|
import os |
|
import json |
|
import urllib.request |
|
from PIL import Image |
|
from gtts import gTTS |
|
import cv2 |
|
import moviepy.editor as mp |
|
import logging |
|
from hercai import Hercai |
|
import uuid |
|
import time |
|
import gradio as gr |
|
from typing import Tuple, List |
|
import numpy as np |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='[%(asctime)s] %(message)s', |
|
handlers=[ |
|
logging.FileHandler('app.log'), |
|
logging.StreamHandler() |
|
] |
|
) |
|
LOGGER = logging.getLogger(__name__) |
|
|
|
class Text2Video: |
|
def __init__(self) -> None: |
|
"""Initialize the Text2Video class.""" |
|
LOGGER.info("Initializing Text2Video application...") |
|
self.herc = Hercai() |
|
LOGGER.info("Hercai API initialized successfully") |
|
|
|
def get_image(self, img_prompt: str) -> str: |
|
"""Generate an image based on the provided text prompt.""" |
|
try: |
|
LOGGER.info(f"π¨ Starting image generation for prompt: {img_prompt}") |
|
|
|
|
|
comic_style_prompt = ( |
|
f"{img_prompt}, comic book style, full scene composition, " |
|
"vibrant colors, clear speech bubbles with text, " |
|
"dramatic lighting, high contrast, detailed backgrounds, " |
|
"comic book panel layout, professional illustration" |
|
) |
|
|
|
LOGGER.info("π Enhanced prompt with comic style elements") |
|
LOGGER.info(f"π Sending request to Hercai API...") |
|
|
|
image_result = self.herc.draw_image( |
|
model="v3", |
|
prompt=comic_style_prompt, |
|
negative_prompt="blurry, cropped, low quality, dark, gloomy" |
|
) |
|
|
|
image_url = image_result["url"] |
|
LOGGER.info(f"β
Image generated successfully: {image_url}") |
|
return image_url |
|
|
|
except Exception as e: |
|
LOGGER.error(f"β Error generating image: {str(e)}") |
|
raise |
|
|
|
def download_img_from_url(self, image_url: str, image_path: str) -> str: |
|
"""Download and process image from URL.""" |
|
try: |
|
urllib.request.urlretrieve(image_url, image_path) |
|
|
|
|
|
img = Image.open(image_path) |
|
target_size = (1792, 1024) |
|
img = img.resize(target_size, Image.Resampling.LANCZOS) |
|
img.save(image_path, quality=95) |
|
|
|
LOGGER.info(f"Successfully downloaded and processed image: {image_path}") |
|
return image_path |
|
|
|
except Exception as e: |
|
LOGGER.error(f"Error downloading image: {e}") |
|
raise |
|
|
|
def text_to_audio(self, img_prompt: str, audio_path: str) -> str: |
|
"""Convert text to speech with enhanced quality.""" |
|
try: |
|
LOGGER.info(f"π Converting text to audio: {img_prompt}") |
|
|
|
|
|
tts = gTTS(text=img_prompt, lang='en', slow=False) |
|
LOGGER.info("π Audio conversion complete") |
|
|
|
|
|
tts.save(audio_path) |
|
LOGGER.info(f"β
Audio saved to: {audio_path}") |
|
|
|
return audio_path |
|
|
|
except Exception as e: |
|
LOGGER.error(f"β Error in audio conversion: {str(e)}") |
|
raise |
|
|
|
def get_images_and_audio(self, list_prompts: List[str]) -> Tuple[List[str], List[str]]: |
|
"""Process multiple prompts to generate images and audio.""" |
|
img_list = [] |
|
audio_paths = [] |
|
|
|
LOGGER.info(f"π¬ Starting batch processing of {len(list_prompts)} prompts") |
|
|
|
for idx, img_prompt in enumerate(list_prompts, 1): |
|
try: |
|
LOGGER.info(f"π Processing prompt {idx}/{len(list_prompts)}") |
|
|
|
|
|
unique_id = uuid.uuid4().hex[:8] |
|
|
|
|
|
image_path = f"scene_{idx}_{unique_id}.png" |
|
img_url = self.get_image(img_prompt) |
|
image = self.download_img_from_url(img_url, image_path) |
|
img_list.append(image) |
|
|
|
|
|
audio_path = f"audio_{idx}_{unique_id}.mp3" |
|
audio = self.text_to_audio(img_prompt, audio_path) |
|
audio_paths.append(audio) |
|
|
|
LOGGER.info(f"β
Completed processing prompt {idx}") |
|
|
|
except Exception as e: |
|
LOGGER.error(f"β Error processing prompt {idx}: {str(e)}") |
|
raise |
|
|
|
return img_list, audio_paths |
|
|
|
def create_video_from_images_and_audio(self, image_files: List[str], |
|
audio_files: List[str], |
|
output_path: str) -> None: |
|
"""Create final video with enhanced quality.""" |
|
try: |
|
LOGGER.info("π₯ Starting video creation process") |
|
|
|
if len(image_files) != len(audio_files): |
|
raise ValueError("Number of images and audio files don't match") |
|
|
|
video_clips = [] |
|
for idx, (image_file, audio_file) in enumerate(zip(image_files, audio_files), 1): |
|
LOGGER.info(f"π Processing scene {idx}/{len(image_files)}") |
|
|
|
|
|
audio_clip = mp.AudioFileClip(audio_file) |
|
video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration) |
|
video_clip = video_clip.set_audio(audio_clip) |
|
video_clips.append(video_clip) |
|
|
|
LOGGER.info(f"β
Scene {idx} processed successfully") |
|
|
|
LOGGER.info("π Concatenating all scenes") |
|
final_clip = mp.concatenate_videoclips(video_clips) |
|
|
|
LOGGER.info("πΎ Writing final video file") |
|
final_clip.write_videofile( |
|
output_path, |
|
codec='libx264', |
|
fps=24, |
|
audio_codec='aac', |
|
audio_bitrate='192k', |
|
preset='medium' |
|
) |
|
|
|
LOGGER.info("β
Video created successfully") |
|
|
|
except Exception as e: |
|
LOGGER.error(f"β Error in video creation: {str(e)}") |
|
raise |
|
|
|
def generate_video(self, text: str) -> str: |
|
"""Main function to generate video from text.""" |
|
try: |
|
LOGGER.info("π¬ Starting video generation process") |
|
|
|
|
|
list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()] |
|
LOGGER.info(f"π Processed {len(list_prompts)} scenes from input text") |
|
|
|
output_path = f"comic_video_{uuid.uuid4().hex[:8]}.mp4" |
|
|
|
|
|
img_list, audio_paths = self.get_images_and_audio(list_prompts) |
|
|
|
|
|
self.create_video_from_images_and_audio(img_list, audio_paths, output_path) |
|
|
|
LOGGER.info(f"β
Video generation completed: {output_path}") |
|
return output_path |
|
|
|
except Exception as e: |
|
LOGGER.error(f"β Error in video generation: {str(e)}") |
|
raise |
|
|
|
def gradio_interface(self): |
|
"""Create Gradio interface.""" |
|
LOGGER.info("π Initializing Gradio interface") |
|
|
|
with gr.Blocks(theme='abidlabs/dracula_revamped') as demo: |
|
gr.HTML(""" |
|
<center><h1 style="color:#fff">Comic Video Generator</h1></center> |
|
""") |
|
|
|
with gr.Row(): |
|
input_text = gr.Textbox( |
|
label="Comic Script", |
|
placeholder="Enter your story (separate scenes with ,,)" |
|
) |
|
|
|
with gr.Row(): |
|
generate_btn = gr.Button("π¬ Generate Video") |
|
|
|
with gr.Row(): |
|
output = gr.Video(label="Generated Comic Video") |
|
|
|
|
|
example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town. |
|
'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,, |
|
That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,""" |
|
gr.Examples([[example_txt]], [input_text]) |
|
|
|
generate_btn.click(self.generate_video, inputs=[input_text], outputs=[output]) |
|
|
|
LOGGER.info("β
Gradio interface initialized") |
|
demo.launch(debug=True) |
|
|
|
if __name__ == "__main__": |
|
text2video = Text2Video() |
|
text2video.gradio_interface() |