import os import json import urllib.request from PIL import Image from gtts import gTTS import cv2 import moviepy.editor as mp import logging from hercai import Hercai import uuid import time import gradio as gr import requests # Configure detailed logging log_dir = os.getenv('LOG_DIRECTORY', './') # Get log directory from environment variable, default to current directory LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log') # Construct the full path to the log file logging.basicConfig( filename=LOGGER_FILE_PATH, filemode='a', # Append to the log file format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s', # Log format datefmt='%Y-%b-%d %H:%M:%S' # Date and time format ) LOGGER = logging.getLogger(__name__) # Get the logger instance log_level_env = os.getenv('LOG_LEVEL', 'INFO') # Get log level from environment variable, default to INFO log_level_dict = { # Dictionary mapping log level names to their corresponding numerical values 'DEBUG': logging.DEBUG, 'INFO': logging.INFO, 'WARNING': logging.WARNING, 'ERROR': logging.ERROR, 'CRITICAL': logging.CRITICAL } # Set the log level based on the environment variable or default to INFO if log_level_env in log_level_dict: log_level = log_level_dict[log_level_env] else: log_level = log_level_dict['INFO'] LOGGER.setLevel(log_level) # Set the log level for the logger instance class Text2Video: """ A class to generate videos from text prompts, with detailed logging, model selection, and a user-friendly interface. """ def __init__(self) -> None: """ Initialize the Text2Video class. """ LOGGER.info("Initializing Text2Video class") self.herc = Hercai("") # Replace "" with your actual Hercai API key if you have one LOGGER.info("Hercai initialized successfully") def get_image(self, img_prompt: str, image_generator: str, image_model: str) -> str: """ Generate an image from a text prompt using the selected AI model, with detailed logging and comic book styling. Args: img_prompt (str): The text prompt to generate the image from. image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations). image_model (str): The specific model to use within the selected AI image generation service. Returns: str: The URL of the generated image. Returns an empty string if an error occurred. """ LOGGER.info(f"Generating image for prompt: {img_prompt}") try: # Create a comic book style prompt modified_prompt = f"Generate a comic book style image with speech bubbles containing the following text: '{img_prompt}'. " \ f"Include elements like vibrant colors, onomatopoeia, and exaggerated expressions to enhance the comic book aesthetic." # Log the modified prompt LOGGER.info(f"Modified prompt for {image_generator}: {modified_prompt}") image_url = "" if image_generator == "Hercai": # Log the selected Hercai model LOGGER.info(f"Using Hercai model: {image_model}") # Generate the image using Hercai image_result = self.herc.draw_image(model=image_model, prompt=modified_prompt, negative_prompt="Dark and gloomy") # Extract the image URL from the result image_url = image_result["url"] elif image_generator == "Prodia": # Log the selected Prodia model LOGGER.info(f"Using Prodia model: {image_model}") # Create the Prodia API call api_url = "https://api.prodia.com/v1/generate" payload = { "model": image_model, "prompt": modified_prompt, "negative_prompt": "Dark and gloomy" } headers = { "Authorization": "Bearer YOUR_PRODIA_API_KEY" # Replace YOUR_PRODIA_API_KEY with your actual Prodia API key } response = requests.post(api_url, json=payload, headers=headers) if response.status_code == 200: image_url = response.json()["url"] # Log the generated image URL LOGGER.info(f"Image generated successfully using Prodia: {image_url}") else: # Log an error if the Prodia API call failed LOGGER.error(f"Error generating image using Prodia: {response.text}") elif image_generator == "Pollinations": # Log the selected Pollinations model LOGGER.info(f"Using Pollinations model: {image_model}") # Implement Pollinations API call here, similar to Prodia # Replace the following placeholder with your Pollinations API call # ... # Log the generated image URL LOGGER.info(f"Image generated successfully: {image_url}") return image_url except Exception as e: # Log any errors encountered during image generation LOGGER.error(f"Error generating image for prompt '{img_prompt}' using {image_generator}: {e}") return "" def download_img_from_url(self, image_url: str, image_path: str) -> str: """ Download an image from a URL to a local file path. Args: image_url (str): The URL of the image to download. image_path (str): The local file path to save the downloaded image. Returns: str: The local file path of the downloaded image. Returns an empty string if an error occurred. """ LOGGER.info(f"Downloading image from URL: {image_url}") try: # Download the image from the URL and save it to the specified path urllib.request.urlretrieve(image_url, image_path) LOGGER.info(f"Image downloaded and saved to: {image_path}") return image_path except Exception as e: # Log any errors encountered during image download LOGGER.error(f"Error downloading image from URL '{image_url}': {e}") return "" def text_to_audio(self, img_prompt: str, audio_path: str) -> str: """ Convert text to speech using gTTS and save it as an audio file. Args: img_prompt (str): The text to convert to speech. audio_path (str): The local file path to save the generated audio file. Returns: str: The local file path of the saved audio file. Returns an empty string if an error occurred. """ LOGGER.info(f"Converting text to audio: {img_prompt}") try: # Set the language for speech synthesis (English in this case) language = 'en' # Create a gTTS object to convert text to speech myobj = gTTS(text=img_prompt, lang=language, slow=False) # Save the audio file at the specified path myobj.save(audio_path) LOGGER.info(f"Audio saved to: {audio_path}") return audio_path except Exception as e: # Log any errors encountered during text-to-audio conversion LOGGER.error(f"Error converting text '{img_prompt}' to audio: {e}") return "" def get_images_and_audio(self, list_prompts: list, image_generator: str, image_model: str) -> tuple: """ Generate images and corresponding audio files for a list of text prompts using the selected AI model. Args: list_prompts (list): A list of text prompts. image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations). image_model (str): The specific model to use within the selected AI image generation service. Returns: tuple: A tuple containing two lists: image paths and audio paths. """ LOGGER.info("Generating images and audio for prompts") img_list = [] # List to store image paths audio_paths = [] # List to store audio paths for img_prompt in list_prompts: LOGGER.info(f"Processing prompt: {img_prompt}") try: # Generate a unique identifier for the image and audio files unique_id = uuid.uuid4().hex # Construct the image path using the unique identifier image_path = f"{img_prompt[:9]}_{unique_id}.png" # Generate the image URL using the selected AI model img_url = self.get_image(img_prompt, image_generator, image_model) # Download the image from the generated URL image = self.download_img_from_url(img_url, image_path) # Add the image path to the list img_list.append(image) # Construct the audio path using the unique identifier audio_path = f"{img_prompt[:9]}_{unique_id}.mp3" # Convert the text to audio and save it audio = self.text_to_audio(img_prompt, audio_path) # Add the audio path to the list audio_paths.append(audio) except Exception as e: # Log any errors encountered during the process LOGGER.error(f"Error processing prompt '{img_prompt}': {e}") # Return the lists of image paths and audio paths LOGGER.info("Images and audio generated successfully") return img_list, audio_paths def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None: """ Generate a video from a list of image files and corresponding audio files. Args: image_files (list): A list of local file paths to image files. audio_files (list): A list of local file paths to audio files. output_path (str): The local file path where the generated video will be saved. """ LOGGER.info("Creating video from images and audio") try: # Check if the number of images and audio files match if len(image_files) != len(audio_files): # Log an error if the number of image files and audio files don't match LOGGER.error("Error: Number of images doesn't match the number of audio files.") return # Create an empty list to store video clips video_clips = [] # Loop through each image file and corresponding audio file for image_file, audio_file in zip(image_files, audio_files): LOGGER.info(f"Processing image: {image_file}, audio: {audio_file}") # Read the image file using OpenCV frame = cv2.imread(image_file) # Load the audio clip using MoviePy audio_clip = mp.AudioFileClip(audio_file) # Create a video clip from the image and set its duration to the audio clip's duration video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration) # Set the audio for the video clip video_clip = video_clip.set_audio(audio_clip) # Append the video clip to the list of video clips video_clips.append(video_clip) # Concatenate all the video clips into a single video clip final_clip = mp.concatenate_videoclips(video_clips) # Write the final video clip to a file using the specified output path final_clip.write_videofile(output_path, codec='libx264', fps=24) LOGGER.info(f"Video created successfully at: {output_path}") except Exception as e: # Log any errors encountered during video creation LOGGER.error(f"Error creating video: {e}") def generate_video(self, text: str, image_generator: str, image_model: str) -> str: """ Generate a video from a comma-separated string of text prompts using the selected AI model. Args: text (str): A comma-separated string of text prompts, where each prompt represents a scene or frame in the video. image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations). image_model (str): The specific model to use within the selected AI image generation service. Returns: str: The file path of the generated video file. Returns an empty string if an error occurred. """ LOGGER.info("Generating video from text") try: # Split the input text into a list of prompts list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()] LOGGER.info(f"Prompts extracted from text: {list_prompts}") # Define the output path for the generated video output_path = "output_video.mp4" # Generate images and corresponding audio files for each prompt using the selected AI model img_list, audio_paths = self.get_images_and_audio(list_prompts, image_generator, image_model) # Create the video from the generated images and audio files self.create_video_from_images_and_audio(img_list, audio_paths, output_path) LOGGER.info(f"Video generated successfully: {output_path}") return output_path except Exception as e: # Log any errors encountered during video generation LOGGER.error(f"Error generating video from text '{text}': {e}") return "" def gradio_interface(self): """ Creates a user-friendly Gradio interface for the video generation application. """ LOGGER.info("Launching Gradio interface") with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo: # Set the title of the application gr.HTML("""

Comics Video Generator

""") # Create a text box for user input, allowing them to enter comic book text with gr.Row(elem_id="col-container"): input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics text, separating scenes with double commas (,,)") # Create a dropdown menu for selecting the AI image generation service with gr.Row(elem_id="col-container"): image_generator = gr.Dropdown(label="Image Generator", choices=["Hercai", "Prodia", "Pollinations"], value="Hercai", interactive=True) # Create a dropdown menu for selecting the specific model within the chosen service with gr.Row(elem_id="col-container"): image_model = gr.Dropdown(label="Image Model", choices=["v1", "v2", "v3", "simurg", "animefy", "raava", "shonin"], value="v3", interactive=True) # Create a button that triggers the video generation process with gr.Row(elem_id="col-container"): button = gr.Button("Generate Video") # Create a component to display the generated video with gr.Row(elem_id="col-container"): output = gr.PlayableVideo() # Provide an example to guide users on how to format their input with gr.Row(elem_id="col-container"): example_txt = """Once upon a time there was a village. It was a nice place to live, except for one thing. People did not like to share.,, One day a visitor came to town. 'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,, 'That's okay', said the visitor. 'I will make stone soup for everyone'. Then he took a stone and dropped it into a giant pot,,""" example = gr.Examples([example_txt], input_text) # Define the button's click event to call the generate_video function with the user's input and model selection button.click(self.generate_video, [input_text, image_generator, image_model], output) LOGGER.info("Gradio interface launched successfully") # Launch the Gradio interface demo.launch(debug=True) if __name__ == "__main__": LOGGER.info("Starting application") text2video = Text2Video() # Create an instance of the Text2Video class text2video.gradio_interface() # Launch the Gradio interface