HongcanGuo commited on
Commit
90f84f0
·
verified ·
1 Parent(s): e268f1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -32
app.py CHANGED
@@ -11,6 +11,7 @@ import scipy.io.wavfile
11
  import re
12
  import glob
13
  import os
 
14
 
15
  # 定义图像到文本函数
16
  def img2text(image):
@@ -62,7 +63,6 @@ def text2text(user_input):
62
  completion = response.json()
63
  return completion['choices'][0]['message']['content']
64
 
65
- # 定义文本到视频函数
66
  def text2vid(input_text):
67
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
68
  adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", config_file="wangfuyun/AnimateLCM/config.json", torch_dtype=torch.float16)
@@ -75,7 +75,8 @@ def text2vid(input_text):
75
  print("Ignoring the error:", str(e))
76
  pipe.enable_vae_slicing()
77
  pipe.enable_model_cpu_offload()
78
- for index, sentence in enumerate(sentences):
 
79
  output = pipe(
80
  prompt=sentence + ", 4k, high resolution",
81
  negative_prompt="bad quality, worse quality, low resolution",
@@ -85,7 +86,10 @@ def text2vid(input_text):
85
  generator=torch.Generator("cpu").manual_seed(0)
86
  )
87
  frames = output.frames[0]
88
- export_to_gif(frames, f"./{index+1}.gif")
 
 
 
89
 
90
  # 定义生成最终视频的函数
91
  def video_generate():
@@ -95,49 +99,35 @@ def video_generate():
95
  final_clip = concatenate_videoclips(clips, method="compose")
96
  final_clip.write_videofile('output_video.mp4', codec='libx264')
97
 
98
- # 定义文本到音频函数
99
  def text2audio(text_input, duration_seconds):
100
  processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
101
  model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
102
  inputs = processor(text=[text_input], padding=True, return_tensors="pt")
103
  max_new_tokens = int((duration_seconds / 5) * 256)
104
  audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
105
- scipy.io.wavfile.write("bgm.wav", rate=model.config.audio_encoder.sampling_rate, data=audio_values[0, 0].numpy())
 
 
106
 
107
- # 定义生成结果视频的函数
108
- def result_generate():
109
- video = VideoFileClip("output_video.mp4")
110
- audio = AudioFileClip("bgm.wav")
111
- video = video.set_audio(audio)
112
- video.write_videofile("result.mp4", codec="libx264", audio_codec="aac")
113
 
114
- # 定义删除所有文件的函数
115
- def delete_all_files(directory):
116
- for filename in os.listdir(directory):
117
- file_path = os.path.join(directory, filename)
118
- try:
119
- if os.path.isfile(file_path):
120
- os.remove(file_path)
121
- print(f"Deleted {filename}")
122
- elif os.path.isdir(file_path):
123
- os.rmdir(file_path)
124
- print(f"Deleted empty directory {filename}")
125
- except Exception as e:
126
- print(f"Failed to delete {filename}. Reason: {e}")
127
 
128
- # 整合所有步骤到主函数
129
  def generate_video(image):
130
- #delete_all_files("data")
131
  text = img2text(image)
132
  sentences = text2text(text)
133
- text2vid(sentences)
134
- video_generate()
135
- video = VideoFileClip("output_video.mp4")
136
  duration = video.duration
137
  audio_text = text2text(text)
138
- text2audio(audio_text, duration)
139
- result_generate()
140
- return "result.mp4"
141
 
142
  # 定义 Gradio 接口
143
  # interface = gr.Interface(
 
11
  import re
12
  import glob
13
  import os
14
+ from io import BytesIO
15
 
16
  # 定义图像到文本函数
17
  def img2text(image):
 
63
  completion = response.json()
64
  return completion['choices'][0]['message']['content']
65
 
 
66
  def text2vid(input_text):
67
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
68
  adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", config_file="wangfuyun/AnimateLCM/config.json", torch_dtype=torch.float16)
 
75
  print("Ignoring the error:", str(e))
76
  pipe.enable_vae_slicing()
77
  pipe.enable_model_cpu_offload()
78
+ video_clips = []
79
+ for sentence in sentences:
80
  output = pipe(
81
  prompt=sentence + ", 4k, high resolution",
82
  negative_prompt="bad quality, worse quality, low resolution",
 
86
  generator=torch.Generator("cpu").manual_seed(0)
87
  )
88
  frames = output.frames[0]
89
+ video_clip = frames_to_video_clip(frames)
90
+ video_clips.append(video_clip)
91
+ final_clip = concatenate_videoclips(video_clips, method="compose")
92
+ return final_clip
93
 
94
  # 定义生成最终视频的函数
95
  def video_generate():
 
99
  final_clip = concatenate_videoclips(clips, method="compose")
100
  final_clip.write_videofile('output_video.mp4', codec='libx264')
101
 
102
+ # 修改音频生成函数
103
  def text2audio(text_input, duration_seconds):
104
  processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
105
  model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
106
  inputs = processor(text=[text_input], padding=True, return_tensors="pt")
107
  max_new_tokens = int((duration_seconds / 5) * 256)
108
  audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
109
+ audio_array = audio_values[0, 0].numpy()
110
+ audio_clip = numpy_array_to_audio_clip(audio_array, rate=model.config.audio_encoder.sampling_rate)
111
+ return audio_clip
112
 
113
+ # 修改最终视频生成函数
114
+ def result_generate(video_clip, audio_clip):
115
+ video = video_clip.set_audio(audio_clip)
116
+ video_bytes = video_clip_to_bytes(video)
117
+ return video_bytes
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ # 主函数,结合上述修改
121
  def generate_video(image):
 
122
  text = img2text(image)
123
  sentences = text2text(text)
124
+ final_video_clip = text2vid(sentences)
125
+ video = VideoFileClip(final_video_clip)
 
126
  duration = video.duration
127
  audio_text = text2text(text)
128
+ audio_clip = text2audio(audio_text, duration)
129
+ result_video = result_generate(final_video_clip, audio_clip)
130
+ return result_video
131
 
132
  # 定义 Gradio 接口
133
  # interface = gr.Interface(