pOpsPaper commited on
Commit
d05f06b
1 Parent(s): 83b8300

Update pops.py

Browse files
Files changed (1) hide show
  1. pops.py +13 -5
pops.py CHANGED
@@ -15,15 +15,15 @@ prior_instruct_repo: str = 'models/instruct/learned_prior.pth'
15
  prior_scene_repo: str = 'models/scene/learned_prior.pth'
16
  prior_repo = "pOpsPaper/operators"
17
 
18
- gpu = torch.device('cuda')
19
- cpu = torch.device('cpu')
20
 
21
  class PopsPipelines:
22
  def __init__(self):
23
  weight_dtype = torch.float16
24
  self.weight_dtype = weight_dtype
25
- device = 'cuda:0'
26
- self.device = device
27
  self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(kandinsky_prior_repo,
28
  subfolder='image_encoder',
29
  torch_dtype=weight_dtype).eval()
@@ -84,6 +84,7 @@ class PopsPipelines:
84
  return image
85
 
86
  def process_text(self, text):
 
87
  text_inputs = self.tokenizer(
88
  text,
89
  padding="max_length",
@@ -96,12 +97,14 @@ class PopsPipelines:
96
  text_encoder_output = self.text_encoder(text_inputs.input_ids.to(self.device))
97
  text_encoder_hidden_states = text_encoder_output.last_hidden_state
98
  text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()]
 
99
  return text_encoder_concat
100
 
101
  def run_binary(self, input_a, input_b, prior_type):
102
  # Move pipeline to GPU
103
  pipeline = self.priors_dict[prior_type]['pipeline']
104
  pipeline.to('cuda')
 
105
  input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, input_b,
106
  self.image_encoder,
107
  pipeline.prior.clip_mean.detach(),
@@ -131,14 +134,17 @@ class PopsPipelines:
131
 
132
  # Move pipeline to CPU
133
  pipeline.to('cpu')
 
134
  return img_emb
135
 
136
  def run_instruct(self, input_a, text):
 
137
  text_encodings = self.process_text(text)
138
 
139
  # Move pipeline to GPU
140
  instruct_pipeline = self.priors_dict['instruct']['pipeline']
141
  instruct_pipeline.to('cuda')
 
142
  input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, None,
143
  self.image_encoder,
144
  instruct_pipeline.prior.clip_mean.detach(), instruct_pipeline.prior.clip_std.detach(),
@@ -155,13 +161,15 @@ class PopsPipelines:
155
 
156
  # Move pipeline to CPU
157
  instruct_pipeline.to('cpu')
 
158
  return img_emb
159
 
160
  def render(self, img_emb):
 
161
  images = self.decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=img_emb.negative_image_embeds,
162
  num_inference_steps=50, height=512,
163
  width=512, guidance_scale=4).images
164
-
165
  return images[0]
166
 
167
  def run_instruct_texture(self, image_object_path, text_instruct, image_texture_path):
 
15
  prior_scene_repo: str = 'models/scene/learned_prior.pth'
16
  prior_repo = "pOpsPaper/operators"
17
 
18
+ # gpu = torch.device('cuda')
19
+ # cpu = torch.device('cpu')
20
 
21
  class PopsPipelines:
22
  def __init__(self):
23
  weight_dtype = torch.float16
24
  self.weight_dtype = weight_dtype
25
+ device = 'cpu' #torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ self.device = 'cuda' #device
27
  self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(kandinsky_prior_repo,
28
  subfolder='image_encoder',
29
  torch_dtype=weight_dtype).eval()
 
84
  return image
85
 
86
  def process_text(self, text):
87
+ self.text_encoder.to('cuda')
88
  text_inputs = self.tokenizer(
89
  text,
90
  padding="max_length",
 
97
  text_encoder_output = self.text_encoder(text_inputs.input_ids.to(self.device))
98
  text_encoder_hidden_states = text_encoder_output.last_hidden_state
99
  text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()]
100
+ self.text_encoder.to('cpu')
101
  return text_encoder_concat
102
 
103
  def run_binary(self, input_a, input_b, prior_type):
104
  # Move pipeline to GPU
105
  pipeline = self.priors_dict[prior_type]['pipeline']
106
  pipeline.to('cuda')
107
+ self.image_encoder.to('cuda')
108
  input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, input_b,
109
  self.image_encoder,
110
  pipeline.prior.clip_mean.detach(),
 
134
 
135
  # Move pipeline to CPU
136
  pipeline.to('cpu')
137
+ self.image_encoder.to('cpu')
138
  return img_emb
139
 
140
  def run_instruct(self, input_a, text):
141
+
142
  text_encodings = self.process_text(text)
143
 
144
  # Move pipeline to GPU
145
  instruct_pipeline = self.priors_dict['instruct']['pipeline']
146
  instruct_pipeline.to('cuda')
147
+ self.image_encoder.to('cuda')
148
  input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, None,
149
  self.image_encoder,
150
  instruct_pipeline.prior.clip_mean.detach(), instruct_pipeline.prior.clip_std.detach(),
 
161
 
162
  # Move pipeline to CPU
163
  instruct_pipeline.to('cpu')
164
+ self.image_encoder.to('cpu')
165
  return img_emb
166
 
167
  def render(self, img_emb):
168
+ self.decoder.to('cuda')
169
  images = self.decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=img_emb.negative_image_embeds,
170
  num_inference_steps=50, height=512,
171
  width=512, guidance_scale=4).images
172
+ self.decoder.to('cpu')
173
  return images[0]
174
 
175
  def run_instruct_texture(self, image_object_path, text_instruct, image_texture_path):