Spaces:

HongcanGuo
/

InspiroV

Running

App Files Files Community

HongcanGuo commited on May 30, 2024

Commit

cb70d57

verified ·

1 Parent(s): 5b17412

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -21

app.py CHANGED Viewed

@@ -73,7 +73,7 @@ import torch
 from diffusers import AnimateDiffPipeline, LCMScheduler, MotionAdapter
 from diffusers.utils import export_to_gif
 import re
-def text2vid(input_text):
     # 使用正则表达式分割输入文本并提取句子
     sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
@@ -95,11 +95,10 @@ def text2vid(input_text):
     all_frames = []  # 存储所有句子的所有帧
-    # 循环遍历每个句子，生成动画并导出为GIF
     for index, sentence in enumerate(sentences):
         output = pipe(
-            #prompt=sentence + ", 4k, high resolution",
-            prompt=sentence + ", cartoon",
             negative_prompt="bad quality, worse quality, low resolution",
             num_frames=24,
             guidance_scale=2.0,
@@ -111,7 +110,39 @@ def text2vid(input_text):
     return all_frames
 def text2text_A(user_input):
@@ -156,6 +187,42 @@ def text2audio(text_input, duration_seconds):
     print(duration_seconds)
     return audio_values[0, 0].numpy(), model.config.audio_encoder.sampling_rate
 # 定义生成结果视频的函数
 def result_generate(video_clip, audio_clip):
     video = video_clip.set_audio(audio_clip)
@@ -164,13 +231,27 @@ def result_generate(video_clip, audio_clip):
     video_buffer.seek(0)
     return video_buffer
-def generate_video(image):
     # 获取图像描述
     text = img2text(image)
     # 生成详细的文本场景描述
     sentences = text2text(text)
     # 生成视频帧
-    video_frames = text2vid(sentences)
     # 转换视频帧为numpy数组
     video_frames = [np.array(frame) for frame in video_frames]
@@ -199,7 +280,22 @@ def generate_video(image):
     # 函数现在返回视频文件的路径，不再需要读取数据并删除
     return video_file_path
 import traceback
 def safe_generate_video(image):
@@ -214,18 +310,38 @@ def safe_generate_video(image):
         return None, error_msg
 # 定义 Gradio 接口
-interface = gr.Interface(
-    fn=lambda img: safe_generate_video(img.convert('RGB')),  # 确保输入是RGB格式的图片
-    inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.Video(label="Generated Video"),
-        gr.Textbox(label="Error Messages", placeholder="No errors", lines=5)
-    ],
-    title="InspiroV Video Generation",
-    description="Upload an image to generate a video. Any errors will be displayed below.",
-    theme="soft"
-)
-# 启动 Gradio 应用
-interface.launch()

 from diffusers import AnimateDiffPipeline, LCMScheduler, MotionAdapter
 from diffusers.utils import export_to_gif
 import re
+def text2vid(input_text,desc = "4k, high resolution"):
     # 使用正则表达式分割输入文本并提取句子
     sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
     all_frames = []  # 存储所有句子的所有帧
     for index, sentence in enumerate(sentences):
         output = pipe(
+            #prompt=sentence + ", " + desc,
+            prompt=sentence + ", " + desc,
             negative_prompt="bad quality, worse quality, low resolution",
             num_frames=24,
             guidance_scale=2.0,
     return all_frames
+def text2vid_pro(input_text,desc = "4k, high resolution"):
+    # 使用正则表达式分割输入文本并提取句子
+    sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
+    # 加载动作适配器和动画扩散管道
+    adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", config_file="wangfuyun/AnimateLCM/config.json", torch_dtype=torch.float16)
+    pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
+    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
+    # 加载LoRA权重
+    pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
+    # 设置适配器并启用功能
+    try:
+        pipe.set_adapters(["lcm-lora"], [0.8])
+    except ValueError as e:
+        print("Ignoring the error:", str(e))
+    pipe.enable_vae_slicing()
+    pipe.enable_model_cpu_offload()
+    # 循环遍历每个句子，生成动画并导出为GIF
+    for index, sentence in enumerate(sentences):
+        output = pipe(
+            #prompt=sentence + "," + desc ,
+            prompt=sentence + ", cartoon",
+            negative_prompt="bad quality, worse quality, low resolution",
+            num_frames=24,
+            guidance_scale=2.0,
+            num_inference_steps=6,
+            generator=torch.Generator("cpu").manual_seed(0)
+        )
+        frames = output.frames[0]
+        export_to_gif(frames, f"{index+1}.gif")
 def text2text_A(user_input):
     print(duration_seconds)
     return audio_values[0, 0].numpy(), model.config.audio_encoder.sampling_rate
+def text2audio_pro(text_input, duration_seconds):
+    processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+    model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+    inputs = processor(text=[text_input], padding=True, return_tensors="pt")
+    # Calculate max_new_tokens based on the desired duration
+    max_new_tokens = int((duration_seconds / 5) * 256)
+    audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
+    # Save audio file
+    scipy.io.wavfile.write("bgm.wav", rate=model.config.audio_encoder.sampling_rate, data=audio_values[0, 0].numpy())
+from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
+import glob
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+import scipy.io.wavfile
+def video_generate_pro(img2text_input=" "):
+    # 设置视频帧率
+    frame_rate = 24  # 可以修改这个值来设置不同的帧率
+    # 获取所有GIF文件，假设它们位于同一文件夹并按名称排序
+    gif_files = sorted(glob.glob('./*.gif'))
+    # 创建视频剪辑列表，每个GIF文件作为一个VideoFileClip
+    clips = [VideoFileClip(gif) for gif in gif_files]
+    # 连接视频剪辑
+    final_clip = concatenate_videoclips(clips, method="compose")
+    # 输出视频文件
+    final_clip.write_videofile('output_video.mp4', codec='libx264')
 # 定义生成结果视频的函数
 def result_generate(video_clip, audio_clip):
     video = video_clip.set_audio(audio_clip)
     video_buffer.seek(0)
     return video_buffer
+from moviepy.editor import VideoFileClip, AudioFileClip
+def result_generate_pro():
+    # 加载视频文件
+    video = VideoFileClip("output_video.mp4")
+    # 加载音频文件
+    audio = AudioFileClip("bgm.wav")
+    # 将音频设置为视频的音频
+    video = video.set_audio(audio)
+    # 导出新的视频文件
+    video.write_videofile("result.mp4", codec="libx264", audio_codec="aac")
+def generate_video_basic(image,desc):
     # 获取图像描述
     text = img2text(image)
     # 生成详细的文本场景描述
     sentences = text2text(text)
     # 生成视频帧
+    video_frames = text2vid(sentences,desc)
     # 转换视频帧为numpy数组
     video_frames = [np.array(frame) for frame in video_frames]
     # 函数现在返回视频文件的路径，不再需要读取数据并删除
     return video_file_path
+def generate_video_pro(image,desc):
+    # 获取图像描述
+    text = img2text(image)
+    sentences = text2text(text)  # 从文本生成结构化句子
+    text2vid_pro(sentences, desc)  # 从句子创建视频序列
+    video_generate_pro()  # 创建视频文件
+    video = VideoFileClip("output_video.mp4")
+    duration = video.duration
+    print(duration)
+    audio_text = text2text_A(text)
+    text2audio_pro(audio_text,duration)
+    result_generate_pro()
+    return "result.mp4"
 import traceback
 def safe_generate_video(image):
         return None, error_msg
 # 定义 Gradio 接口
+with gr.Blocks() as demo:
+    gr.Markdown("Upload an image and provide a description to generate a video.")
+    with gr.Tab("Basic Version"):
+        with gr.Row():
+            image_input = gr.Image(type="pil")
+            description_input = gr.Textbox(label="Description", placeholder="Enter description here", lines=2)
+        with gr.Row():
+            video_output = gr.Video(label="Generated Video")
+            error_output = gr.Textbox(label="Error Messages", placeholder="No errors", lines=5)
+        gr.Interface(
+            fn=generate_video_basic,
+            inputs=[image_input, description_input],
+            outputs=[video_output, error_output],
+            title="Basic Version Video Generation",
+            description="Upload an image and some descriptions to generate a video in the basic version. Any errors will be displayed below.",
+            theme="soft"
+        )
+    with gr.Tab("Pro Version"):
+        with gr.Row():
+            image_input = gr.Image(type="pil")
+            description_input = gr.Textbox(label="Description", placeholder="Enter description here", lines=2)
+        with gr.Row():
+            video_output = gr.Video(label="Generated Video")
+            error_output = gr.Textbox(label="Error Messages", placeholder="No errors", lines=5)
+        gr.Interface(
+            fn=generate_video_pro,
+            inputs=[image_input, description_input],
+            outputs=[video_output, error_output],
+            title="Pro Version Video Generation",
+            description="Upload an image and some descriptions to generate a video in the pro version. Any errors will be displayed below.",
+            theme="soft"
+        )
+demo.launch()