Commit
·
cc8658a
1
Parent(s):
a372e74
temporalvideo usage changes (#7)
Browse files- temporalvideo usage changes (0feb70ddaca3da8481f8c3aa38e37c0d94b38d05)
Co-authored-by: Jason Gilholme <JasonGilholme@users.noreply.huggingface.co>
- temporalvideo.py +98 -35
temporalvideo.py
CHANGED
@@ -4,6 +4,7 @@ import requests
|
|
4 |
import json
|
5 |
import cv2
|
6 |
import numpy as np
|
|
|
7 |
import sys
|
8 |
import torch
|
9 |
from PIL import Image
|
@@ -20,6 +21,27 @@ import cv2
|
|
20 |
from torchvision.io import write_jpeg
|
21 |
import pickle
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
25 |
|
@@ -27,11 +49,9 @@ model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device
|
|
27 |
model = model.eval()
|
28 |
|
29 |
# Replace with the actual path to your image file and folder
|
30 |
-
x_path = "./init.png"
|
31 |
-
y_folder = "./Input_Images"
|
32 |
|
33 |
-
|
34 |
-
|
35 |
|
36 |
def get_image_paths(folder):
|
37 |
image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
|
@@ -40,7 +60,46 @@ def get_image_paths(folder):
|
|
40 |
files.extend(glob.glob(os.path.join(folder, ext)))
|
41 |
return sorted(files)
|
42 |
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def send_request(last_image_path, optical_flow_path,current_image_path):
|
46 |
url = "http://localhost:7860/sdapi/v1/img2img"
|
@@ -51,7 +110,6 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
|
|
51 |
# Load and process the last image
|
52 |
last_image = cv2.imread(last_image_path)
|
53 |
last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
|
54 |
-
last_image = cv2.resize(last_image, (512, 512))
|
55 |
|
56 |
# Load and process the optical flow image
|
57 |
flow_image = cv2.imread(optical_flow_path)
|
@@ -79,31 +137,39 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
|
|
79 |
"inpainting_mask_invert": 1,
|
80 |
"resize_mode": 0,
|
81 |
"denoising_strength": 0.4,
|
82 |
-
"prompt":
|
83 |
-
"negative_prompt":
|
84 |
"alwayson_scripts": {
|
85 |
"ControlNet":{
|
86 |
"args": [
|
87 |
{
|
88 |
"input_image": current_image,
|
89 |
"module": "hed",
|
90 |
-
"model":
|
91 |
"weight": 0.7,
|
92 |
"guidance": 1,
|
|
|
|
|
93 |
},
|
94 |
{
|
95 |
"input_image": encoded_image,
|
96 |
-
"model":
|
97 |
"module": "none",
|
98 |
"weight": 0.6,
|
99 |
"guidance": 1,
|
|
|
|
|
|
|
|
|
100 |
},
|
101 |
{
|
102 |
"input_image": current_image,
|
103 |
-
"model":
|
104 |
"module": "openpose_full",
|
105 |
"weight": 0.7,
|
106 |
-
"guidance":1,
|
|
|
|
|
107 |
}
|
108 |
|
109 |
|
@@ -118,8 +184,8 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
|
|
118 |
"n_iter": 1,
|
119 |
"steps": 20,
|
120 |
"cfg_scale": 6,
|
121 |
-
"width":
|
122 |
-
"height":
|
123 |
"restore_faces": True,
|
124 |
"include_init_images": True,
|
125 |
"override_settings": {},
|
@@ -164,25 +230,18 @@ def infer(frameA, frameB):
|
|
164 |
img2_batch = F.resize(img2_batch, size=[512, 512])
|
165 |
return transforms(img1_batch, img2_batch)
|
166 |
|
167 |
-
|
168 |
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
|
169 |
|
170 |
-
|
171 |
list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
|
172 |
|
173 |
-
|
174 |
-
|
175 |
|
176 |
-
|
|
|
177 |
|
178 |
-
|
179 |
|
180 |
-
predicted_flow = list_of_flows[-1][0]
|
181 |
-
opitcal_flow_path = os.path.join(output_folder, f"flow_{i}.png")
|
182 |
-
flow_img = flow_to_image(predicted_flow).to("cpu")
|
183 |
-
write_jpeg(flow_img,opitcal_flow_path)
|
184 |
-
|
185 |
-
|
186 |
return opitcal_flow_path
|
187 |
|
188 |
output_images = []
|
@@ -190,13 +249,13 @@ output_paths = []
|
|
190 |
|
191 |
# Initialize with the first image path
|
192 |
|
193 |
-
result =
|
194 |
-
output_image_path = os.path.join(
|
195 |
|
196 |
#with open(output_image_path, "wb") as f:
|
197 |
# f.write(result)
|
198 |
|
199 |
-
last_image_path =
|
200 |
for i in range(1, len(y_paths)):
|
201 |
# Use the last image path and optical flow map to generate the next input
|
202 |
optical_flow = infer(y_paths[i - 1], y_paths[i])
|
@@ -204,10 +263,14 @@ for i in range(1, len(y_paths)):
|
|
204 |
# Modify your send_request to use the last_image_path
|
205 |
result = send_request(last_image_path, optical_flow, y_paths[i])
|
206 |
data = json.loads(result)
|
207 |
-
encoded_image = data["images"][0]
|
208 |
-
output_image_path = os.path.join(output_folder, f"output_image_{i}.png")
|
209 |
-
last_image_path = output_image_path
|
210 |
-
with open(output_image_path, "wb") as f:
|
211 |
-
f.write(base64.b64decode(encoded_image))
|
212 |
-
print(f"Written data for frame {i}:")
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import json
|
5 |
import cv2
|
6 |
import numpy as np
|
7 |
+
import re
|
8 |
import sys
|
9 |
import torch
|
10 |
from PIL import Image
|
|
|
21 |
from torchvision.io import write_jpeg
|
22 |
import pickle
|
23 |
|
24 |
+
import argparse
|
25 |
+
|
26 |
+
|
27 |
+
def get_args():
|
28 |
+
parser = argparse.ArgumentParser()
|
29 |
+
|
30 |
+
parser.add_argument('prompt')
|
31 |
+
parser.add_argument('--negative-prompt', dest='negative_prompt', default="")
|
32 |
+
|
33 |
+
parser.add_argument('--init-image', dest='init_image', default="./init.png")
|
34 |
+
parser.add_argument('--input-dir', dest='input_dir', default="./Input_Images")
|
35 |
+
parser.add_argument('--output-dir', dest='output_dir', default="./output")
|
36 |
+
|
37 |
+
parser.add_argument('--width', default=512, type=int)
|
38 |
+
parser.add_argument('--height', default=512, type=int)
|
39 |
+
|
40 |
+
return parser.parse_args()
|
41 |
+
|
42 |
+
|
43 |
+
args = get_args()
|
44 |
+
|
45 |
|
46 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
47 |
|
|
|
49 |
model = model.eval()
|
50 |
|
51 |
# Replace with the actual path to your image file and folder
|
|
|
|
|
52 |
|
53 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
54 |
+
|
55 |
|
56 |
def get_image_paths(folder):
|
57 |
image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
|
|
|
60 |
files.extend(glob.glob(os.path.join(folder, ext)))
|
61 |
return sorted(files)
|
62 |
|
63 |
+
|
64 |
+
y_paths = get_image_paths(args.input_dir)
|
65 |
+
|
66 |
+
|
67 |
+
def get_controlnet_models():
|
68 |
+
url = "http://localhost:7860/controlnet/model_list"
|
69 |
+
|
70 |
+
temporalnet_model = None
|
71 |
+
temporalnet_re = re.compile("^temporalnetversion2 \[.{8}\]")
|
72 |
+
|
73 |
+
hed_model = None
|
74 |
+
hed_re = re.compile("^control_.*hed.* \[.{8}\]")
|
75 |
+
|
76 |
+
openpose_model = None
|
77 |
+
openpose_re = re.compile("^control_.*openpose.* \[.{8}\]")
|
78 |
+
|
79 |
+
response = requests.get(url)
|
80 |
+
if response.status_code == 200:
|
81 |
+
models = json.loads(response.content)
|
82 |
+
else:
|
83 |
+
raise Exception("Unable to list models from the SD Web API! "
|
84 |
+
"Is it running and is the controlnet extension installed?")
|
85 |
+
|
86 |
+
for model in models['model_list']:
|
87 |
+
if temporalnet_model is None and temporalnet_re.match(model):
|
88 |
+
temporalnet_model = model
|
89 |
+
elif hed_model is None and hed_re.match(model):
|
90 |
+
hed_model = model
|
91 |
+
elif openpose_model is None and openpose_re.match(model):
|
92 |
+
openpose_model = model
|
93 |
+
|
94 |
+
assert temporalnet_model is not None, "Unable to find the temporalnet2 model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
|
95 |
+
assert hed_model is not None, "Unable to find the hed_model model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
|
96 |
+
assert openpose_model is not None, "Unable to find the openpose model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
|
97 |
+
|
98 |
+
return temporalnet_model, hed_model, openpose_model
|
99 |
+
|
100 |
+
|
101 |
+
TEMPORALNET_MODEL, HED_MODEL, OPENPOSE_MODEL = get_controlnet_models()
|
102 |
+
|
103 |
|
104 |
def send_request(last_image_path, optical_flow_path,current_image_path):
|
105 |
url = "http://localhost:7860/sdapi/v1/img2img"
|
|
|
110 |
# Load and process the last image
|
111 |
last_image = cv2.imread(last_image_path)
|
112 |
last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
|
|
|
113 |
|
114 |
# Load and process the optical flow image
|
115 |
flow_image = cv2.imread(optical_flow_path)
|
|
|
137 |
"inpainting_mask_invert": 1,
|
138 |
"resize_mode": 0,
|
139 |
"denoising_strength": 0.4,
|
140 |
+
"prompt": args.prompt,
|
141 |
+
"negative_prompt": args.negative_prompt,
|
142 |
"alwayson_scripts": {
|
143 |
"ControlNet":{
|
144 |
"args": [
|
145 |
{
|
146 |
"input_image": current_image,
|
147 |
"module": "hed",
|
148 |
+
"model": HED_MODEL,
|
149 |
"weight": 0.7,
|
150 |
"guidance": 1,
|
151 |
+
"pixel_perfect": True,
|
152 |
+
"resize_mode": 0,
|
153 |
},
|
154 |
{
|
155 |
"input_image": encoded_image,
|
156 |
+
"model": TEMPORALNET_MODEL,
|
157 |
"module": "none",
|
158 |
"weight": 0.6,
|
159 |
"guidance": 1,
|
160 |
+
# "processor_res": 512,
|
161 |
+
"threshold_a": 64,
|
162 |
+
"threshold_b": 64,
|
163 |
+
"resize_mode": 0,
|
164 |
},
|
165 |
{
|
166 |
"input_image": current_image,
|
167 |
+
"model": OPENPOSE_MODEL,
|
168 |
"module": "openpose_full",
|
169 |
"weight": 0.7,
|
170 |
+
"guidance": 1,
|
171 |
+
"pixel_perfect": True,
|
172 |
+
"resize_mode": 0,
|
173 |
}
|
174 |
|
175 |
|
|
|
184 |
"n_iter": 1,
|
185 |
"steps": 20,
|
186 |
"cfg_scale": 6,
|
187 |
+
"width": args.width,
|
188 |
+
"height": args.height,
|
189 |
"restore_faces": True,
|
190 |
"include_init_images": True,
|
191 |
"override_settings": {},
|
|
|
230 |
img2_batch = F.resize(img2_batch, size=[512, 512])
|
231 |
return transforms(img1_batch, img2_batch)
|
232 |
|
|
|
233 |
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
|
234 |
|
|
|
235 |
list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
|
236 |
|
237 |
+
predicted_flow = list_of_flows[-1][0]
|
238 |
+
opitcal_flow_path = os.path.join(args.output_dir, f"flow_{i}.png")
|
239 |
|
240 |
+
flow_img = flow_to_image(predicted_flow).to("cpu")
|
241 |
+
flow_img = F.resize(flow_img, size=[args.height, args.width])
|
242 |
|
243 |
+
write_jpeg(flow_img, opitcal_flow_path)
|
244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
return opitcal_flow_path
|
246 |
|
247 |
output_images = []
|
|
|
249 |
|
250 |
# Initialize with the first image path
|
251 |
|
252 |
+
result = args.init_image
|
253 |
+
output_image_path = os.path.join(args.output_dir, f"output_image_0.png")
|
254 |
|
255 |
#with open(output_image_path, "wb") as f:
|
256 |
# f.write(result)
|
257 |
|
258 |
+
last_image_path = args.init_image
|
259 |
for i in range(1, len(y_paths)):
|
260 |
# Use the last image path and optical flow map to generate the next input
|
261 |
optical_flow = infer(y_paths[i - 1], y_paths[i])
|
|
|
263 |
# Modify your send_request to use the last_image_path
|
264 |
result = send_request(last_image_path, optical_flow, y_paths[i])
|
265 |
data = json.loads(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
+
for j, encoded_image in enumerate(data["images"]):
|
268 |
+
if j == 0:
|
269 |
+
output_image_path = os.path.join(args.output_dir, f"output_image_{i}.png")
|
270 |
+
last_image_path = output_image_path
|
271 |
+
else:
|
272 |
+
output_image_path = os.path.join(args.output_dir, f"controlnet_image_{j}_{i}.png")
|
273 |
+
|
274 |
+
with open(output_image_path, "wb") as f:
|
275 |
+
f.write(base64.b64decode(encoded_image))
|
276 |
+
print(f"Written data for frame {i}:")
|