Spaces:

AZLABS
/

Comic-2

Running

App Files Files Community

AZLABS commited on Nov 4, 2024

Commit

885164c

verified ·

1 Parent(s): 3b31d0b

Update app.py

Browse files

Files changed (1) hide show

app.py +218 -66

app.py CHANGED Viewed

@@ -11,20 +11,15 @@ import uuid
 import time
 import gradio as gr
 from typing import Tuple, List
-# Enhanced logging configuration
-log_dir = os.getenv('LOG_DIRECTORY', './')
-LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')
 logging.basicConfig(
-    filename=LOGGER_FILE_PATH,
-    filemode='a',
-    format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
-    datefmt='%Y-%b-%d %H:%M:%S',
     level=logging.INFO,
     handlers=[
-        logging.FileHandler(LOGGER_FILE_PATH),
-        logging.StreamHandler()
     ]
 )
 LOGGER = logging.getLogger(__name__)
@@ -32,120 +27,277 @@ LOGGER = logging.getLogger(__name__)
 class Text2Video:
     def __init__(self) -> None:
         """Initialize the Text2Video class."""
         self.herc = Hercai()
-        LOGGER.info("Initialized Text2Video with Hercai API")
     def get_image(self, img_prompt: str) -> str:
         """Generate an image based on the provided text prompt."""
         try:
-            # Enhanced prompt engineering similar to DALL-E 3
-            enhanced_prompt = (
-                f"Create a high-quality comic panel: {img_prompt}. "
-                "Style: Professional comic book illustration, "
-                "vivid colors, clear composition, dramatic lighting. "
-                "Include text as comic-style captions. "
-                "Resolution: High detail, 1792x1024 aspect ratio. "
-                "Quality: Professional grade comic art."
             )
-            result = self.herc.draw_image(
-                model="simurg",  # Using Hercai's best model
-                prompt=enhanced_prompt,
-                negative_prompt="blurry, low quality, poorly drawn, distorted"
             )
-            LOGGER.info(f"Successfully generated image for prompt: {img_prompt[:50]}...")
-            return result["url"]
         except Exception as e:
-            LOGGER.error(f"Error generating image: {e}")
             raise
     def download_img_from_url(self, image_url: str, image_path: str) -> str:
         """Download and process image from URL."""
         try:
             urllib.request.urlretrieve(image_url, image_path)
-            # Image processing for consistent quality
             img = Image.open(image_path)
-            target_size = (1792, 1024)  # Matching DALL-E 3 size
-            img = img.resize(target_size, Image.Resampling.LANCZOS)
-            img.save(image_path, quality=95)
-            LOGGER.info(f"Successfully downloaded and processed image: {image_path}")
             return image_path
         except Exception as e:
-            LOGGER.error(f"Error downloading image: {e}")
             raise
-    def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
-        """Create video with enhanced quality settings."""
         try:
-            if len(image_files) != len(audio_files):
-                raise ValueError("Number of images doesn't match number of audio files")
             video_clips = []
-            for image_file, audio_file in zip(image_files, audio_files):
                 audio_clip = mp.AudioFileClip(audio_file)
                 video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
                 video_clip = video_clip.set_audio(audio_clip)
                 video_clips.append(video_clip)
             final_clip = mp.concatenate_videoclips(video_clips)
-            # Enhanced video quality settings
             final_clip.write_videofile(
                 output_path,
                 codec='libx264',
                 fps=24,
                 audio_codec='aac',
                 audio_bitrate='192k',
-                preset='medium',
-                threads=4
             )
-            LOGGER.info("Video created successfully")
         except Exception as e:
-            LOGGER.error(f"Error creating video: {e}")
             raise
-    # [Previous methods remain the same]
     def gradio_interface(self):
-        """Create enhanced Gradio interface."""
         with gr.Blocks(theme='abidlabs/dracula_revamped') as demo:
             gr.HTML("""
-                <center>
-                    <h1 style="color:#fff">AI Comic Video Generator</h1>
-                    <p style="color:#ddd">Create engaging comic-style videos from your stories</p>
-                </center>
             """)
             with gr.Row():
                 input_text = gr.Textbox(
-                    label="Story Script",
                     placeholder="Enter your story (separate scenes with ,,)",
                     lines=5
                 )
             with gr.Row():
-                generate_btn = gr.Button("🎬 Generate Comic Video", variant="primary")
             with gr.Row():
                 output = gr.Video(label="Generated Comic Video")
-            example_txt = """Once upon a time in a magical forest,,
-            A brave knight discovered a mysterious crystal,,
-            The crystal began to glow with incredible power"""
-            gr.Examples([[example_txt]], [input_text])
-            generate_btn.click(
-                self.generate_video,
                 inputs=[input_text],
-                outputs=[output]
             )
-        demo.launch(debug=True)

 import time
 import gradio as gr
 from typing import Tuple, List
+import numpy as np
+# Configure logging with console output
 logging.basicConfig(
     level=logging.INFO,
+    format='[%(asctime)s] %(message)s',
     handlers=[
+        logging.FileHandler('app.log'),
+        logging.StreamHandler()  # This will print to console
     ]
 )
 LOGGER = logging.getLogger(__name__)
 class Text2Video:
     def __init__(self) -> None:
         """Initialize the Text2Video class."""
+        LOGGER.info("Initializing Text2Video application...")
         self.herc = Hercai()
+        LOGGER.info("Hercai API initialized successfully")
     def get_image(self, img_prompt: str) -> str:
         """Generate an image based on the provided text prompt."""
         try:
+            LOGGER.info(f"🎨 Starting image generation for prompt: {img_prompt}")
+            # Enhanced prompt for better comic-style results
+            comic_style_prompt = (
+                f"{img_prompt}, comic book style, full scene composition, "
+                "vibrant colors, clear speech bubbles with text, "
+                "dramatic lighting, high contrast, detailed backgrounds, "
+                "comic book panel layout, professional illustration"
             )
+            LOGGER.info("📝 Enhanced prompt with comic style elements")
+            LOGGER.info(f"🔄 Sending request to Hercai API...")
+            image_result = self.herc.draw_image(
+                model="simurg",
+                prompt=comic_style_prompt,
+                negative_prompt="blurry, cropped, low quality, dark, gloomy"
             )
+            image_url = image_result["url"]
+            LOGGER.info(f"✅ Image generated successfully: {image_url}")
+            return image_url
         except Exception as e:
+            LOGGER.error(f"❌ Error generating image: {str(e)}")
             raise
     def download_img_from_url(self, image_url: str, image_path: str) -> str:
         """Download and process image from URL."""
         try:
+            LOGGER.info(f"📥 Downloading image from: {image_url}")
+            # Download image
             urllib.request.urlretrieve(image_url, image_path)
+            # Process image to ensure full coverage
             img = Image.open(image_path)
+            # Resize maintaining aspect ratio
+            target_size = (1024, 1024)
+            img.thumbnail(target_size, Image.Resampling.LANCZOS)
+            # Create new image with padding if needed
+            new_img = Image.new('RGB', target_size, (255, 255, 255))
+            offset = ((target_size[0] - img.size[0]) // 2,
+                     (target_size[1] - img.size[1]) // 2)
+            new_img.paste(img, offset)
+            # Save processed image
+            new_img.save(image_path, quality=95)
+            LOGGER.info(f"✅ Image processed and saved to: {image_path}")
             return image_path
         except Exception as e:
+            LOGGER.error(f"❌ Error processing image: {str(e)}")
             raise
+    def text_to_audio(self, img_prompt: str, audio_path: str) -> str:
+        """Convert text to speech with enhanced quality."""
         try:
+            LOGGER.info(f"🔊 Converting text to audio: {img_prompt}")
+            # Create audio with enhanced parameters
+            tts = gTTS(text=img_prompt, lang='en', slow=False)
+            LOGGER.info("📝 Audio conversion complete")
+            # Save audio file
+            tts.save(audio_path)
+            LOGGER.info(f"✅ Audio saved to: {audio_path}")
+            return audio_path
+        except Exception as e:
+            LOGGER.error(f"❌ Error in audio conversion: {str(e)}")
+            raise
+    def get_images_and_audio(self, list_prompts: List[str]) -> Tuple[List[str], List[str]]:
+        """Process multiple prompts to generate images and audio."""
+        img_list = []
+        audio_paths = []
+        LOGGER.info(f"🎬 Starting batch processing of {len(list_prompts)} prompts")
+        for idx, img_prompt in enumerate(list_prompts, 1):
+            try:
+                LOGGER.info(f"📍 Processing prompt {idx}/{len(list_prompts)}")
+                # Generate unique identifier
+                unique_id = uuid.uuid4().hex[:8]
+                # Process image
+                image_path = f"scene_{idx}_{unique_id}.png"
+                img_url = self.get_image(img_prompt)
+                image = self.download_img_from_url(img_url, image_path)
+                img_list.append(image)
+                # Process audio
+                audio_path = f"audio_{idx}_{unique_id}.mp3"
+                audio = self.text_to_audio(img_prompt, audio_path)
+                audio_paths.append(audio)
+                LOGGER.info(f"✅ Completed processing prompt {idx}")
+            except Exception as e:
+                LOGGER.error(f"❌ Error processing prompt {idx}: {str(e)}")
+                raise
+        return img_list, audio_paths
+    def create_video_from_images_and_audio(self, image_files: List[str],
+                                         audio_files: List[str],
+                                         output_path: str) -> None:
+        """Create final video with enhanced quality."""
+        try:
+            LOGGER.info("🎥 Starting video creation process")
+            if len(image_files) != len(audio_files):
+                raise ValueError("Number of images and audio files don't match")
             video_clips = []
+            for idx, (image_file, audio_file) in enumerate(zip(image_files, audio_files), 1):
+                LOGGER.info(f"🔄 Processing scene {idx}/{len(image_files)}")
+                # Load audio and create video clip
                 audio_clip = mp.AudioFileClip(audio_file)
                 video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
                 video_clip = video_clip.set_audio(audio_clip)
                 video_clips.append(video_clip)
+                LOGGER.info(f"✅ Scene {idx} processed successfully")
+            LOGGER.info("🔄 Concatenating all scenes")
             final_clip = mp.concatenate_videoclips(video_clips)
+            LOGGER.info("💾 Writing final video file")
             final_clip.write_videofile(
                 output_path,
                 codec='libx264',
                 fps=24,
                 audio_codec='aac',
                 audio_bitrate='192k',
+                preset='medium'
             )
+            LOGGER.info("✅ Video created successfully")
         except Exception as e:
+            LOGGER.error(f"❌ Error in video creation: {str(e)}")
+            raise
+    def generate_video(self, text: str) -> str:
+        """Main function to generate video from text."""
+        try:
+            LOGGER.info("🎬 Starting video generation process")
+            # Split text into prompts
+            list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
+            LOGGER.info(f"📝 Processed {len(list_prompts)} scenes from input text")
+            output_path = f"comic_video_{uuid.uuid4().hex[:8]}.mp4"
+            # Generate images and audio
+            img_list, audio_paths = self.get_images_and_audio(list_prompts)
+            # Create final video
+            self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
+            LOGGER.info(f"✅ Video generation completed: {output_path}")
+            return output_path
+        except Exception as e:
+            LOGGER.error(f"❌ Error in video generation: {str(e)}")
             raise
     def gradio_interface(self):
+        """Create Gradio interface."""
+        LOGGER.info("🌐 Initializing Gradio interface")
+        # Example stories of different lengths
+        short_story = """A little bird found a shiny seed,,
+        The bird planted it in the garden,,
+        The next day, a magical flower grew,,
+        The bird and flower became best friends"""
+        medium_story = """In a bustling city, a lonely robot worked in a toy shop,,
+        Every night, the robot would fix broken toys while everyone slept,,
+        One day, a little girl noticed the robot's kindness,,
+        She started leaving thank you notes for the robot,,
+        The robot began making special toys just for her,,
+        Soon, other children discovered the robot's magical toys,,
+        The toy shop became famous for its special toys,,
+        The robot wasn't lonely anymore, surrounded by happy children"""
+        long_story = """Deep in the enchanted forest, there lived a young wizard named Leo who couldn't control his magic,,
+        Every time Leo tried to cast a spell, unexpected things would happen - flowers turned into butterflies, rain became candy, and his cat could suddenly speak French,,
+        One day, Leo discovered an ancient book hidden beneath a talking tree,,
+        The book revealed that his 'mistakes' were actually rare gifts - the ability to bring joy and wonder to the world,,
+        Excited about his discovery, Leo started practicing his unique magic in secret,,
+        He created a garden where flowers sang lullabies,,
+        He made clouds that rained rainbow bubbles,,
+        He transformed ordinary objects into extraordinary treasures,,
+        Word spread about the magical happenings in the forest,,
+        Children from nearby villages would visit to see Leo's wonderful creations,,
+        Adults who had forgotten about magic began to believe again,,
+        The forest became a place of wonder and happiness,,
+        Leo realized that sometimes the best magic comes from embracing what makes you different,,
+        And so, the once-frustrated wizard became known as Leo the Wonderful, master of joyful surprises"""
         with gr.Blocks(theme='abidlabs/dracula_revamped') as demo:
             gr.HTML("""
+                <center><h1 style="color:#fff">Comic Video Generator</h1></center>
+                <center><p style="color:#ccc">Create animated comics from your stories! Separate scenes with double commas (,,)</p></center>
             """)
             with gr.Row():
                 input_text = gr.Textbox(
+                    label="Comic Script",
                     placeholder="Enter your story (separate scenes with ,,)",
                     lines=5
                 )
             with gr.Row():
+                generate_btn = gr.Button("🎬 Generate Video", variant="primary")
             with gr.Row():
                 output = gr.Video(label="Generated Comic Video")
+            # Examples section with three stories
+            gr.Examples(
+                examples=[
+                    [short_story],
+                    [medium_story],
+                    [long_story]
+                ],
                 inputs=[input_text],
+                label="Story Examples",
+                headers=[
+                    "Short Story (4 scenes)",
+                    "Medium Story (8 scenes)",
+                    "Long Story (14 scenes)"
+                ]
             )
+            # Add some helpful tips
+            gr.HTML("""
+                <div style="color:#ccc; padding: 20px; margin-top: 20px; background: rgba(0,0,0,0.1); border-radius: 10px;">
+                    <h3>Tips for Better Results:</h3>
+                    <ul>
+                        <li>Separate each scene with double commas (,,)</li>
+                        <li>Keep each scene description clear and vivid</li>
+                        <li>Include character emotions and actions</li>
+                        <li>Describe the setting and atmosphere</li>
+                        <li>Aim for 3-15 scenes for best results</li>
+                    </ul>
+                </div>
+            """)
+            generate_btn.click(self.generate_video, inputs=[input_text], outputs=[output])
+        LOGGER.info("✅ Gradio interface initialized")
+        demo.launch(debug=True)
+if __name__ == "__main__":
+    text2video = Text2Video()
+    text2video.gradio_interface()