AZLABS commited on
Commit
885164c
Β·
verified Β·
1 Parent(s): 3b31d0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -66
app.py CHANGED
@@ -11,20 +11,15 @@ import uuid
11
  import time
12
  import gradio as gr
13
  from typing import Tuple, List
 
14
 
15
- # Enhanced logging configuration
16
- log_dir = os.getenv('LOG_DIRECTORY', './')
17
- LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')
18
-
19
  logging.basicConfig(
20
- filename=LOGGER_FILE_PATH,
21
- filemode='a',
22
- format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
23
- datefmt='%Y-%b-%d %H:%M:%S',
24
  level=logging.INFO,
 
25
  handlers=[
26
- logging.FileHandler(LOGGER_FILE_PATH),
27
- logging.StreamHandler()
28
  ]
29
  )
30
  LOGGER = logging.getLogger(__name__)
@@ -32,120 +27,277 @@ LOGGER = logging.getLogger(__name__)
32
  class Text2Video:
33
  def __init__(self) -> None:
34
  """Initialize the Text2Video class."""
 
35
  self.herc = Hercai()
36
- LOGGER.info("Initialized Text2Video with Hercai API")
37
 
38
  def get_image(self, img_prompt: str) -> str:
39
  """Generate an image based on the provided text prompt."""
40
  try:
41
- # Enhanced prompt engineering similar to DALL-E 3
42
- enhanced_prompt = (
43
- f"Create a high-quality comic panel: {img_prompt}. "
44
- "Style: Professional comic book illustration, "
45
- "vivid colors, clear composition, dramatic lighting. "
46
- "Include text as comic-style captions. "
47
- "Resolution: High detail, 1792x1024 aspect ratio. "
48
- "Quality: Professional grade comic art."
49
  )
50
-
51
- result = self.herc.draw_image(
52
- model="simurg", # Using Hercai's best model
53
- prompt=enhanced_prompt,
54
- negative_prompt="blurry, low quality, poorly drawn, distorted"
 
 
 
55
  )
56
-
57
- LOGGER.info(f"Successfully generated image for prompt: {img_prompt[:50]}...")
58
- return result["url"]
 
59
 
60
  except Exception as e:
61
- LOGGER.error(f"Error generating image: {e}")
62
  raise
63
 
64
  def download_img_from_url(self, image_url: str, image_path: str) -> str:
65
  """Download and process image from URL."""
66
  try:
 
 
 
67
  urllib.request.urlretrieve(image_url, image_path)
68
 
69
- # Image processing for consistent quality
70
  img = Image.open(image_path)
71
- target_size = (1792, 1024) # Matching DALL-E 3 size
72
- img = img.resize(target_size, Image.Resampling.LANCZOS)
73
- img.save(image_path, quality=95)
74
 
75
- LOGGER.info(f"Successfully downloaded and processed image: {image_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  return image_path
77
 
78
  except Exception as e:
79
- LOGGER.error(f"Error downloading image: {e}")
80
  raise
81
 
82
- def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
83
- """Create video with enhanced quality settings."""
84
  try:
85
- if len(image_files) != len(audio_files):
86
- raise ValueError("Number of images doesn't match number of audio files")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
 
 
 
 
 
 
 
 
 
 
88
  video_clips = []
89
- for image_file, audio_file in zip(image_files, audio_files):
 
 
 
90
  audio_clip = mp.AudioFileClip(audio_file)
91
  video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
92
  video_clip = video_clip.set_audio(audio_clip)
93
  video_clips.append(video_clip)
94
-
 
 
 
95
  final_clip = mp.concatenate_videoclips(video_clips)
96
 
97
- # Enhanced video quality settings
98
  final_clip.write_videofile(
99
  output_path,
100
  codec='libx264',
101
  fps=24,
102
  audio_codec='aac',
103
  audio_bitrate='192k',
104
- preset='medium',
105
- threads=4
106
  )
107
 
108
- LOGGER.info("Video created successfully")
109
 
110
  except Exception as e:
111
- LOGGER.error(f"Error creating video: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  raise
113
 
114
- # [Previous methods remain the same]
115
 
116
  def gradio_interface(self):
117
- """Create enhanced Gradio interface."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  with gr.Blocks(theme='abidlabs/dracula_revamped') as demo:
119
  gr.HTML("""
120
- <center>
121
- <h1 style="color:#fff">AI Comic Video Generator</h1>
122
- <p style="color:#ddd">Create engaging comic-style videos from your stories</p>
123
- </center>
124
  """)
125
-
126
  with gr.Row():
127
  input_text = gr.Textbox(
128
- label="Story Script",
129
  placeholder="Enter your story (separate scenes with ,,)",
130
  lines=5
131
  )
132
-
133
  with gr.Row():
134
- generate_btn = gr.Button("🎬 Generate Comic Video", variant="primary")
135
-
136
  with gr.Row():
137
  output = gr.Video(label="Generated Comic Video")
138
-
139
- example_txt = """Once upon a time in a magical forest,,
140
- A brave knight discovered a mysterious crystal,,
141
- The crystal began to glow with incredible power"""
142
 
143
- gr.Examples([[example_txt]], [input_text])
144
-
145
- generate_btn.click(
146
- self.generate_video,
 
 
 
147
  inputs=[input_text],
148
- outputs=[output]
 
 
 
 
 
149
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- demo.launch(debug=True)
 
 
 
11
  import time
12
  import gradio as gr
13
  from typing import Tuple, List
14
+ import numpy as np
15
 
16
+ # Configure logging with console output
 
 
 
17
  logging.basicConfig(
 
 
 
 
18
  level=logging.INFO,
19
+ format='[%(asctime)s] %(message)s',
20
  handlers=[
21
+ logging.FileHandler('app.log'),
22
+ logging.StreamHandler() # This will print to console
23
  ]
24
  )
25
  LOGGER = logging.getLogger(__name__)
 
27
  class Text2Video:
28
  def __init__(self) -> None:
29
  """Initialize the Text2Video class."""
30
+ LOGGER.info("Initializing Text2Video application...")
31
  self.herc = Hercai()
32
+ LOGGER.info("Hercai API initialized successfully")
33
 
34
  def get_image(self, img_prompt: str) -> str:
35
  """Generate an image based on the provided text prompt."""
36
  try:
37
+ LOGGER.info(f"🎨 Starting image generation for prompt: {img_prompt}")
38
+
39
+ # Enhanced prompt for better comic-style results
40
+ comic_style_prompt = (
41
+ f"{img_prompt}, comic book style, full scene composition, "
42
+ "vibrant colors, clear speech bubbles with text, "
43
+ "dramatic lighting, high contrast, detailed backgrounds, "
44
+ "comic book panel layout, professional illustration"
45
  )
46
+
47
+ LOGGER.info("πŸ“ Enhanced prompt with comic style elements")
48
+ LOGGER.info(f"πŸ”„ Sending request to Hercai API...")
49
+
50
+ image_result = self.herc.draw_image(
51
+ model="simurg",
52
+ prompt=comic_style_prompt,
53
+ negative_prompt="blurry, cropped, low quality, dark, gloomy"
54
  )
55
+
56
+ image_url = image_result["url"]
57
+ LOGGER.info(f"βœ… Image generated successfully: {image_url}")
58
+ return image_url
59
 
60
  except Exception as e:
61
+ LOGGER.error(f"❌ Error generating image: {str(e)}")
62
  raise
63
 
64
  def download_img_from_url(self, image_url: str, image_path: str) -> str:
65
  """Download and process image from URL."""
66
  try:
67
+ LOGGER.info(f"πŸ“₯ Downloading image from: {image_url}")
68
+
69
+ # Download image
70
  urllib.request.urlretrieve(image_url, image_path)
71
 
72
+ # Process image to ensure full coverage
73
  img = Image.open(image_path)
 
 
 
74
 
75
+ # Resize maintaining aspect ratio
76
+ target_size = (1024, 1024)
77
+ img.thumbnail(target_size, Image.Resampling.LANCZOS)
78
+
79
+ # Create new image with padding if needed
80
+ new_img = Image.new('RGB', target_size, (255, 255, 255))
81
+ offset = ((target_size[0] - img.size[0]) // 2,
82
+ (target_size[1] - img.size[1]) // 2)
83
+ new_img.paste(img, offset)
84
+
85
+ # Save processed image
86
+ new_img.save(image_path, quality=95)
87
+
88
+ LOGGER.info(f"βœ… Image processed and saved to: {image_path}")
89
  return image_path
90
 
91
  except Exception as e:
92
+ LOGGER.error(f"❌ Error processing image: {str(e)}")
93
  raise
94
 
95
+ def text_to_audio(self, img_prompt: str, audio_path: str) -> str:
96
+ """Convert text to speech with enhanced quality."""
97
  try:
98
+ LOGGER.info(f"πŸ”Š Converting text to audio: {img_prompt}")
99
+
100
+ # Create audio with enhanced parameters
101
+ tts = gTTS(text=img_prompt, lang='en', slow=False)
102
+ LOGGER.info("πŸ“ Audio conversion complete")
103
+
104
+ # Save audio file
105
+ tts.save(audio_path)
106
+ LOGGER.info(f"βœ… Audio saved to: {audio_path}")
107
+
108
+ return audio_path
109
+
110
+ except Exception as e:
111
+ LOGGER.error(f"❌ Error in audio conversion: {str(e)}")
112
+ raise
113
+
114
+ def get_images_and_audio(self, list_prompts: List[str]) -> Tuple[List[str], List[str]]:
115
+ """Process multiple prompts to generate images and audio."""
116
+ img_list = []
117
+ audio_paths = []
118
+
119
+ LOGGER.info(f"🎬 Starting batch processing of {len(list_prompts)} prompts")
120
+
121
+ for idx, img_prompt in enumerate(list_prompts, 1):
122
+ try:
123
+ LOGGER.info(f"πŸ“ Processing prompt {idx}/{len(list_prompts)}")
124
+
125
+ # Generate unique identifier
126
+ unique_id = uuid.uuid4().hex[:8]
127
+
128
+ # Process image
129
+ image_path = f"scene_{idx}_{unique_id}.png"
130
+ img_url = self.get_image(img_prompt)
131
+ image = self.download_img_from_url(img_url, image_path)
132
+ img_list.append(image)
133
+
134
+ # Process audio
135
+ audio_path = f"audio_{idx}_{unique_id}.mp3"
136
+ audio = self.text_to_audio(img_prompt, audio_path)
137
+ audio_paths.append(audio)
138
+
139
+ LOGGER.info(f"βœ… Completed processing prompt {idx}")
140
+
141
+ except Exception as e:
142
+ LOGGER.error(f"❌ Error processing prompt {idx}: {str(e)}")
143
+ raise
144
+
145
+ return img_list, audio_paths
146
 
147
+ def create_video_from_images_and_audio(self, image_files: List[str],
148
+ audio_files: List[str],
149
+ output_path: str) -> None:
150
+ """Create final video with enhanced quality."""
151
+ try:
152
+ LOGGER.info("πŸŽ₯ Starting video creation process")
153
+
154
+ if len(image_files) != len(audio_files):
155
+ raise ValueError("Number of images and audio files don't match")
156
+
157
  video_clips = []
158
+ for idx, (image_file, audio_file) in enumerate(zip(image_files, audio_files), 1):
159
+ LOGGER.info(f"πŸ”„ Processing scene {idx}/{len(image_files)}")
160
+
161
+ # Load audio and create video clip
162
  audio_clip = mp.AudioFileClip(audio_file)
163
  video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
164
  video_clip = video_clip.set_audio(audio_clip)
165
  video_clips.append(video_clip)
166
+
167
+ LOGGER.info(f"βœ… Scene {idx} processed successfully")
168
+
169
+ LOGGER.info("πŸ”„ Concatenating all scenes")
170
  final_clip = mp.concatenate_videoclips(video_clips)
171
 
172
+ LOGGER.info("πŸ’Ύ Writing final video file")
173
  final_clip.write_videofile(
174
  output_path,
175
  codec='libx264',
176
  fps=24,
177
  audio_codec='aac',
178
  audio_bitrate='192k',
179
+ preset='medium'
 
180
  )
181
 
182
+ LOGGER.info("βœ… Video created successfully")
183
 
184
  except Exception as e:
185
+ LOGGER.error(f"❌ Error in video creation: {str(e)}")
186
+ raise
187
+
188
+ def generate_video(self, text: str) -> str:
189
+ """Main function to generate video from text."""
190
+ try:
191
+ LOGGER.info("🎬 Starting video generation process")
192
+
193
+ # Split text into prompts
194
+ list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
195
+ LOGGER.info(f"πŸ“ Processed {len(list_prompts)} scenes from input text")
196
+
197
+ output_path = f"comic_video_{uuid.uuid4().hex[:8]}.mp4"
198
+
199
+ # Generate images and audio
200
+ img_list, audio_paths = self.get_images_and_audio(list_prompts)
201
+
202
+ # Create final video
203
+ self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
204
+
205
+ LOGGER.info(f"βœ… Video generation completed: {output_path}")
206
+ return output_path
207
+
208
+ except Exception as e:
209
+ LOGGER.error(f"❌ Error in video generation: {str(e)}")
210
  raise
211
 
 
212
 
213
  def gradio_interface(self):
214
+ """Create Gradio interface."""
215
+ LOGGER.info("🌐 Initializing Gradio interface")
216
+
217
+ # Example stories of different lengths
218
+ short_story = """A little bird found a shiny seed,,
219
+ The bird planted it in the garden,,
220
+ The next day, a magical flower grew,,
221
+ The bird and flower became best friends"""
222
+
223
+ medium_story = """In a bustling city, a lonely robot worked in a toy shop,,
224
+ Every night, the robot would fix broken toys while everyone slept,,
225
+ One day, a little girl noticed the robot's kindness,,
226
+ She started leaving thank you notes for the robot,,
227
+ The robot began making special toys just for her,,
228
+ Soon, other children discovered the robot's magical toys,,
229
+ The toy shop became famous for its special toys,,
230
+ The robot wasn't lonely anymore, surrounded by happy children"""
231
+
232
+ long_story = """Deep in the enchanted forest, there lived a young wizard named Leo who couldn't control his magic,,
233
+ Every time Leo tried to cast a spell, unexpected things would happen - flowers turned into butterflies, rain became candy, and his cat could suddenly speak French,,
234
+ One day, Leo discovered an ancient book hidden beneath a talking tree,,
235
+ The book revealed that his 'mistakes' were actually rare gifts - the ability to bring joy and wonder to the world,,
236
+ Excited about his discovery, Leo started practicing his unique magic in secret,,
237
+ He created a garden where flowers sang lullabies,,
238
+ He made clouds that rained rainbow bubbles,,
239
+ He transformed ordinary objects into extraordinary treasures,,
240
+ Word spread about the magical happenings in the forest,,
241
+ Children from nearby villages would visit to see Leo's wonderful creations,,
242
+ Adults who had forgotten about magic began to believe again,,
243
+ The forest became a place of wonder and happiness,,
244
+ Leo realized that sometimes the best magic comes from embracing what makes you different,,
245
+ And so, the once-frustrated wizard became known as Leo the Wonderful, master of joyful surprises"""
246
+
247
  with gr.Blocks(theme='abidlabs/dracula_revamped') as demo:
248
  gr.HTML("""
249
+ <center><h1 style="color:#fff">Comic Video Generator</h1></center>
250
+ <center><p style="color:#ccc">Create animated comics from your stories! Separate scenes with double commas (,,)</p></center>
 
 
251
  """)
252
+
253
  with gr.Row():
254
  input_text = gr.Textbox(
255
+ label="Comic Script",
256
  placeholder="Enter your story (separate scenes with ,,)",
257
  lines=5
258
  )
259
+
260
  with gr.Row():
261
+ generate_btn = gr.Button("🎬 Generate Video", variant="primary")
262
+
263
  with gr.Row():
264
  output = gr.Video(label="Generated Comic Video")
 
 
 
 
265
 
266
+ # Examples section with three stories
267
+ gr.Examples(
268
+ examples=[
269
+ [short_story],
270
+ [medium_story],
271
+ [long_story]
272
+ ],
273
  inputs=[input_text],
274
+ label="Story Examples",
275
+ headers=[
276
+ "Short Story (4 scenes)",
277
+ "Medium Story (8 scenes)",
278
+ "Long Story (14 scenes)"
279
+ ]
280
  )
281
+
282
+ # Add some helpful tips
283
+ gr.HTML("""
284
+ <div style="color:#ccc; padding: 20px; margin-top: 20px; background: rgba(0,0,0,0.1); border-radius: 10px;">
285
+ <h3>Tips for Better Results:</h3>
286
+ <ul>
287
+ <li>Separate each scene with double commas (,,)</li>
288
+ <li>Keep each scene description clear and vivid</li>
289
+ <li>Include character emotions and actions</li>
290
+ <li>Describe the setting and atmosphere</li>
291
+ <li>Aim for 3-15 scenes for best results</li>
292
+ </ul>
293
+ </div>
294
+ """)
295
+
296
+ generate_btn.click(self.generate_video, inputs=[input_text], outputs=[output])
297
+
298
+ LOGGER.info("βœ… Gradio interface initialized")
299
+ demo.launch(debug=True)
300
 
301
+ if __name__ == "__main__":
302
+ text2video = Text2Video()
303
+ text2video.gradio_interface()