HongcanGuo commited on
Commit
cb70d57
·
verified ·
1 Parent(s): 5b17412

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -21
app.py CHANGED
@@ -73,7 +73,7 @@ import torch
73
  from diffusers import AnimateDiffPipeline, LCMScheduler, MotionAdapter
74
  from diffusers.utils import export_to_gif
75
  import re
76
- def text2vid(input_text):
77
  # 使用正则表达式分割输入文本并提取句子
78
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
79
 
@@ -95,11 +95,10 @@ def text2vid(input_text):
95
 
96
  all_frames = [] # 存储所有句子的所有帧
97
 
98
- # 循环遍历每个句子,生成动画并导出为GIF
99
  for index, sentence in enumerate(sentences):
100
  output = pipe(
101
- #prompt=sentence + ", 4k, high resolution",
102
- prompt=sentence + ", cartoon",
103
  negative_prompt="bad quality, worse quality, low resolution",
104
  num_frames=24,
105
  guidance_scale=2.0,
@@ -111,7 +110,39 @@ def text2vid(input_text):
111
 
112
  return all_frames
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
 
117
  def text2text_A(user_input):
@@ -156,6 +187,42 @@ def text2audio(text_input, duration_seconds):
156
  print(duration_seconds)
157
  return audio_values[0, 0].numpy(), model.config.audio_encoder.sampling_rate
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # 定义生成结果视频的函数
160
  def result_generate(video_clip, audio_clip):
161
  video = video_clip.set_audio(audio_clip)
@@ -164,13 +231,27 @@ def result_generate(video_clip, audio_clip):
164
  video_buffer.seek(0)
165
  return video_buffer
166
 
167
- def generate_video(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  # 获取图像描述
169
  text = img2text(image)
170
  # 生成详细的文本场景描述
171
  sentences = text2text(text)
172
  # 生成视频帧
173
- video_frames = text2vid(sentences)
174
 
175
  # 转换视频帧为numpy数组
176
  video_frames = [np.array(frame) for frame in video_frames]
@@ -199,7 +280,22 @@ def generate_video(image):
199
 
200
  # 函数现在返回视频文件的路径,不再需要读取数据并删除
201
  return video_file_path
202
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  import traceback
204
 
205
  def safe_generate_video(image):
@@ -214,18 +310,38 @@ def safe_generate_video(image):
214
  return None, error_msg
215
 
216
  # 定义 Gradio 接口
217
- interface = gr.Interface(
218
- fn=lambda img: safe_generate_video(img.convert('RGB')), # 确保输入是RGB格式的图片
219
- inputs=gr.Image(type="pil"),
220
- outputs=[
221
- gr.Video(label="Generated Video"),
222
- gr.Textbox(label="Error Messages", placeholder="No errors", lines=5)
223
- ],
224
- title="InspiroV Video Generation",
225
- description="Upload an image to generate a video. Any errors will be displayed below.",
226
- theme="soft"
227
- )
228
-
229
- # 启动 Gradio 应用
230
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
 
73
  from diffusers import AnimateDiffPipeline, LCMScheduler, MotionAdapter
74
  from diffusers.utils import export_to_gif
75
  import re
76
+ def text2vid(input_text,desc = "4k, high resolution"):
77
  # 使用正则表达式分割输入文本并提取句子
78
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
79
 
 
95
 
96
  all_frames = [] # 存储所有句子的所有帧
97
 
 
98
  for index, sentence in enumerate(sentences):
99
  output = pipe(
100
+ #prompt=sentence + ", " + desc,
101
+ prompt=sentence + ", " + desc,
102
  negative_prompt="bad quality, worse quality, low resolution",
103
  num_frames=24,
104
  guidance_scale=2.0,
 
110
 
111
  return all_frames
112
 
113
+ def text2vid_pro(input_text,desc = "4k, high resolution"):
114
+ # 使用正则表达式分割输入文本并提取句子
115
+ sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
116
+
117
+ # 加载动作适配器和动画扩散管道
118
+ adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", config_file="wangfuyun/AnimateLCM/config.json", torch_dtype=torch.float16)
119
+ pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
120
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
121
+
122
+ # 加载LoRA权重
123
+ pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
124
+
125
+ # 设置适配器并启用功能
126
+ try:
127
+ pipe.set_adapters(["lcm-lora"], [0.8])
128
+ except ValueError as e:
129
+ print("Ignoring the error:", str(e))
130
+ pipe.enable_vae_slicing()
131
+ pipe.enable_model_cpu_offload()
132
 
133
+ # 循环遍历每个句子,生成动画并导出为GIF
134
+ for index, sentence in enumerate(sentences):
135
+ output = pipe(
136
+ #prompt=sentence + "," + desc ,
137
+ prompt=sentence + ", cartoon",
138
+ negative_prompt="bad quality, worse quality, low resolution",
139
+ num_frames=24,
140
+ guidance_scale=2.0,
141
+ num_inference_steps=6,
142
+ generator=torch.Generator("cpu").manual_seed(0)
143
+ )
144
+ frames = output.frames[0]
145
+ export_to_gif(frames, f"{index+1}.gif")
146
 
147
 
148
  def text2text_A(user_input):
 
187
  print(duration_seconds)
188
  return audio_values[0, 0].numpy(), model.config.audio_encoder.sampling_rate
189
 
190
+
191
+ def text2audio_pro(text_input, duration_seconds):
192
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
193
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
194
+ inputs = processor(text=[text_input], padding=True, return_tensors="pt")
195
+
196
+ # Calculate max_new_tokens based on the desired duration
197
+ max_new_tokens = int((duration_seconds / 5) * 256)
198
+
199
+ audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
200
+
201
+ # Save audio file
202
+ scipy.io.wavfile.write("bgm.wav", rate=model.config.audio_encoder.sampling_rate, data=audio_values[0, 0].numpy())
203
+
204
+
205
+ from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
206
+ import glob
207
+ from transformers import AutoProcessor, MusicgenForConditionalGeneration
208
+ import scipy.io.wavfile
209
+
210
+ def video_generate_pro(img2text_input=" "):
211
+ # 设置视频帧率
212
+ frame_rate = 24 # 可以修改这个值来设置不同的帧率
213
+
214
+ # 获取所有GIF文件,假设它们位于同一文件夹并按名称排序
215
+ gif_files = sorted(glob.glob('./*.gif'))
216
+
217
+ # 创建视频剪辑列表,每个GIF文件作为一个VideoFileClip
218
+ clips = [VideoFileClip(gif) for gif in gif_files]
219
+
220
+ # 连接视频剪辑
221
+ final_clip = concatenate_videoclips(clips, method="compose")
222
+
223
+ # 输出视频文件
224
+ final_clip.write_videofile('output_video.mp4', codec='libx264')
225
+
226
  # 定义生成结果视频的函数
227
  def result_generate(video_clip, audio_clip):
228
  video = video_clip.set_audio(audio_clip)
 
231
  video_buffer.seek(0)
232
  return video_buffer
233
 
234
+ from moviepy.editor import VideoFileClip, AudioFileClip
235
+ def result_generate_pro():
236
+ # 加载视频文件
237
+ video = VideoFileClip("output_video.mp4")
238
+
239
+ # 加载音频文件
240
+ audio = AudioFileClip("bgm.wav")
241
+
242
+ # 将音频设置为视频的音频
243
+ video = video.set_audio(audio)
244
+
245
+ # 导出新的视频文件
246
+ video.write_videofile("result.mp4", codec="libx264", audio_codec="aac")
247
+
248
+ def generate_video_basic(image,desc):
249
  # 获取图像描述
250
  text = img2text(image)
251
  # 生成详细的文本场景描述
252
  sentences = text2text(text)
253
  # 生成视频帧
254
+ video_frames = text2vid(sentences,desc)
255
 
256
  # 转换视频帧为numpy数组
257
  video_frames = [np.array(frame) for frame in video_frames]
 
280
 
281
  # 函数现在返回视频文件的路径,不再需要读取数据并删除
282
  return video_file_path
283
+
284
+ def generate_video_pro(image,desc):
285
+ # 获取图像描述
286
+ text = img2text(image)
287
+ sentences = text2text(text) # 从文本生成结构化句子
288
+ text2vid_pro(sentences, desc) # 从句子创建视频序列
289
+ video_generate_pro() # 创建视频文件
290
+ video = VideoFileClip("output_video.mp4")
291
+ duration = video.duration
292
+ print(duration)
293
+ audio_text = text2text_A(text)
294
+ text2audio_pro(audio_text,duration)
295
+ result_generate_pro()
296
+ return "result.mp4"
297
+
298
+
299
  import traceback
300
 
301
  def safe_generate_video(image):
 
310
  return None, error_msg
311
 
312
  # 定义 Gradio 接口
313
+ with gr.Blocks() as demo:
314
+ gr.Markdown("Upload an image and provide a description to generate a video.")
315
+ with gr.Tab("Basic Version"):
316
+ with gr.Row():
317
+ image_input = gr.Image(type="pil")
318
+ description_input = gr.Textbox(label="Description", placeholder="Enter description here", lines=2)
319
+ with gr.Row():
320
+ video_output = gr.Video(label="Generated Video")
321
+ error_output = gr.Textbox(label="Error Messages", placeholder="No errors", lines=5)
322
+ gr.Interface(
323
+ fn=generate_video_basic,
324
+ inputs=[image_input, description_input],
325
+ outputs=[video_output, error_output],
326
+ title="Basic Version Video Generation",
327
+ description="Upload an image and some descriptions to generate a video in the basic version. Any errors will be displayed below.",
328
+ theme="soft"
329
+ )
330
+ with gr.Tab("Pro Version"):
331
+ with gr.Row():
332
+ image_input = gr.Image(type="pil")
333
+ description_input = gr.Textbox(label="Description", placeholder="Enter description here", lines=2)
334
+ with gr.Row():
335
+ video_output = gr.Video(label="Generated Video")
336
+ error_output = gr.Textbox(label="Error Messages", placeholder="No errors", lines=5)
337
+ gr.Interface(
338
+ fn=generate_video_pro,
339
+ inputs=[image_input, description_input],
340
+ outputs=[video_output, error_output],
341
+ title="Pro Version Video Generation",
342
+ description="Upload an image and some descriptions to generate a video in the pro version. Any errors will be displayed below.",
343
+ theme="soft"
344
+ )
345
+
346
+ demo.launch()
347