georgefen commited on
Commit
37210be
β€’
1 Parent(s): d92dcba

support cpu only...

Browse files
app.py CHANGED
@@ -17,8 +17,8 @@ import dlib
17
  from PIL import Image, ImageDraw
18
 
19
  model = create_model('./models/cldm_v15.yaml').cpu()
20
- model.load_state_dict(load_state_dict('./models/control_sd15_landmarks.pth', location='cuda'))
21
- model = model.cuda()
22
  ddim_sampler = DDIMSampler(model)
23
 
24
  detector = dlib.get_frontal_face_detector()
@@ -56,7 +56,7 @@ def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resoluti
56
  detected_map = get_68landmarks_img(img)
57
  detected_map = HWC3(detected_map)
58
 
59
- control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
60
  control = torch.stack([control for _ in range(num_samples)], dim=0)
61
  control = einops.rearrange(control, 'b h w c -> b c h w').clone()
62
 
 
17
  from PIL import Image, ImageDraw
18
 
19
  model = create_model('./models/cldm_v15.yaml').cpu()
20
+ model.load_state_dict(load_state_dict('./models/control_sd15_landmarks.pth', location='cpu'))
21
+ model = model
22
  ddim_sampler = DDIMSampler(model)
23
 
24
  detector = dlib.get_frontal_face_detector()
 
56
  detected_map = get_68landmarks_img(img)
57
  detected_map = HWC3(detected_map)
58
 
59
+ control = torch.from_numpy(detected_map.copy()).float() / 255.0
60
  control = torch.stack([control for _ in range(num_samples)], dim=0)
61
  control = einops.rearrange(control, 'b h w c -> b c h w').clone()
62
 
cldm/cldm.py CHANGED
@@ -424,12 +424,12 @@ class ControlLDM(LatentDiffusion):
424
 
425
  def low_vram_shift(self, is_diffusing):
426
  if is_diffusing:
427
- self.model = self.model.cuda()
428
- self.control_model = self.control_model.cuda()
429
  self.first_stage_model = self.first_stage_model.cpu()
430
  self.cond_stage_model = self.cond_stage_model.cpu()
431
  else:
432
  self.model = self.model.cpu()
433
  self.control_model = self.control_model.cpu()
434
- self.first_stage_model = self.first_stage_model.cuda()
435
- self.cond_stage_model = self.cond_stage_model.cuda()
 
424
 
425
  def low_vram_shift(self, is_diffusing):
426
  if is_diffusing:
427
+ self.model = self.model.cpu()
428
+ self.control_model = self.control_model.cpu()
429
  self.first_stage_model = self.first_stage_model.cpu()
430
  self.cond_stage_model = self.cond_stage_model.cpu()
431
  else:
432
  self.model = self.model.cpu()
433
  self.control_model = self.control_model.cpu()
434
+ self.first_stage_model = self.first_stage_model.cpu()
435
+ self.cond_stage_model = self.cond_stage_model.cpu()
cldm/ddim_hacked.py CHANGED
@@ -16,8 +16,8 @@ class DDIMSampler(object):
16
 
17
  def register_buffer(self, name, attr):
18
  if type(attr) == torch.Tensor:
19
- if attr.device != torch.device("cuda"):
20
- attr = attr.to(torch.device("cuda"))
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
16
 
17
  def register_buffer(self, name, attr):
18
  if type(attr) == torch.Tensor:
19
+ if attr.device != torch.device("cpu"):
20
+ attr = attr.to(torch.device("cpu"))
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/models/diffusion/ddim.py CHANGED
@@ -16,8 +16,8 @@ class DDIMSampler(object):
16
 
17
  def register_buffer(self, name, attr):
18
  if type(attr) == torch.Tensor:
19
- if attr.device != torch.device("cuda"):
20
- attr = attr.to(torch.device("cuda"))
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
16
 
17
  def register_buffer(self, name, attr):
18
  if type(attr) == torch.Tensor:
19
+ if attr.device != torch.device("cpu"):
20
+ attr = attr.to(torch.device("cpu"))
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/models/diffusion/dpm_solver/sampler.py CHANGED
@@ -19,8 +19,8 @@ class DPMSolverSampler(object):
19
 
20
  def register_buffer(self, name, attr):
21
  if type(attr) == torch.Tensor:
22
- if attr.device != torch.device("cuda"):
23
- attr = attr.to(torch.device("cuda"))
24
  setattr(self, name, attr)
25
 
26
  @torch.no_grad()
 
19
 
20
  def register_buffer(self, name, attr):
21
  if type(attr) == torch.Tensor:
22
+ if attr.device != torch.device("cpu"):
23
+ attr = attr.to(torch.device("cpu"))
24
  setattr(self, name, attr)
25
 
26
  @torch.no_grad()
ldm/models/diffusion/plms.py CHANGED
@@ -18,8 +18,8 @@ class PLMSSampler(object):
18
 
19
  def register_buffer(self, name, attr):
20
  if type(attr) == torch.Tensor:
21
- if attr.device != torch.device("cuda"):
22
- attr = attr.to(torch.device("cuda"))
23
  setattr(self, name, attr)
24
 
25
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
18
 
19
  def register_buffer(self, name, attr):
20
  if type(attr) == torch.Tensor:
21
+ if attr.device != torch.device("cpu"):
22
+ attr = attr.to(torch.device("cpu"))
23
  setattr(self, name, attr)
24
 
25
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/modules/attention.py CHANGED
@@ -172,7 +172,7 @@ class CrossAttention(nn.Module):
172
 
173
  # force cast to fp32 to avoid overflowing
174
  if _ATTN_PRECISION =="fp32":
175
- with torch.autocast(enabled=False, device_type = 'cuda'):
176
  q, k = q.float(), k.float()
177
  sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
178
  else:
 
172
 
173
  # force cast to fp32 to avoid overflowing
174
  if _ATTN_PRECISION =="fp32":
175
+ with torch.autocast(enabled=False, device_type = 'cpu'):
176
  q, k = q.float(), k.float()
177
  sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
178
  else:
ldm/modules/diffusionmodules/util.py CHANGED
@@ -133,7 +133,7 @@ class CheckpointFunction(torch.autograd.Function):
133
  def backward(ctx, *output_grads):
134
  ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
135
  with torch.enable_grad(), \
136
- torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
137
  # Fixes a bug where the first op in run_function modifies the
138
  # Tensor storage in place, which is not allowed for detach()'d
139
  # Tensors.
 
133
  def backward(ctx, *output_grads):
134
  ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
135
  with torch.enable_grad(), \
136
+ torch.cpu.amp.autocast(**ctx.gpu_autocast_kwargs):
137
  # Fixes a bug where the first op in run_function modifies the
138
  # Tensor storage in place, which is not allowed for detach()'d
139
  # Tensors.
ldm/modules/encoders/modules.py CHANGED
@@ -42,7 +42,7 @@ class ClassEmbedder(nn.Module):
42
  c = self.embedding(c)
43
  return c
44
 
45
- def get_unconditional_conditioning(self, bs, device="cuda"):
46
  uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
47
  uc = torch.ones((bs,), device=device) * uc_class
48
  uc = {self.key: uc}
@@ -57,7 +57,7 @@ def disabled_train(self, mode=True):
57
 
58
  class FrozenT5Embedder(AbstractEncoder):
59
  """Uses the T5 transformer encoder for text"""
60
- def __init__(self, version="google/t5-v1_1-large", device="cuda", max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
61
  super().__init__()
62
  self.tokenizer = T5Tokenizer.from_pretrained(version)
63
  self.transformer = T5EncoderModel.from_pretrained(version)
@@ -92,7 +92,7 @@ class FrozenCLIPEmbedder(AbstractEncoder):
92
  "pooled",
93
  "hidden"
94
  ]
95
- def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77,
96
  freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
97
  super().__init__()
98
  assert layer in self.LAYERS
@@ -140,7 +140,7 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
140
  "last",
141
  "penultimate"
142
  ]
143
- def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device="cuda", max_length=77,
144
  freeze=True, layer="last"):
145
  super().__init__()
146
  assert layer in self.LAYERS
@@ -194,7 +194,7 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
194
 
195
 
196
  class FrozenCLIPT5Encoder(AbstractEncoder):
197
- def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device="cuda",
198
  clip_max_length=77, t5_max_length=77):
199
  super().__init__()
200
  self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
 
42
  c = self.embedding(c)
43
  return c
44
 
45
+ def get_unconditional_conditioning(self, bs, device="cpu"):
46
  uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
47
  uc = torch.ones((bs,), device=device) * uc_class
48
  uc = {self.key: uc}
 
57
 
58
  class FrozenT5Embedder(AbstractEncoder):
59
  """Uses the T5 transformer encoder for text"""
60
+ def __init__(self, version="google/t5-v1_1-large", device="cpu", max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
61
  super().__init__()
62
  self.tokenizer = T5Tokenizer.from_pretrained(version)
63
  self.transformer = T5EncoderModel.from_pretrained(version)
 
92
  "pooled",
93
  "hidden"
94
  ]
95
+ def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77,
96
  freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
97
  super().__init__()
98
  assert layer in self.LAYERS
 
140
  "last",
141
  "penultimate"
142
  ]
143
+ def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device="cpu", max_length=77,
144
  freeze=True, layer="last"):
145
  super().__init__()
146
  assert layer in self.LAYERS
 
194
 
195
 
196
  class FrozenCLIPT5Encoder(AbstractEncoder):
197
+ def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device="cpu",
198
  clip_max_length=77, t5_max_length=77):
199
  super().__init__()
200
  self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)