OmPrakashSingh1704 commited on
Commit
c10fc76
·
1 Parent(s): 94a2d08
options/Banner_Model/__pycache__/Image2Image_2.cpython-310.pyc CHANGED
Binary files a/options/Banner_Model/__pycache__/Image2Image_2.cpython-310.pyc and b/options/Banner_Model/__pycache__/Image2Image_2.cpython-310.pyc differ
 
options/Video_model/__pycache__/Model.cpython-310.pyc CHANGED
Binary files a/options/Video_model/__pycache__/Model.cpython-310.pyc and b/options/Video_model/__pycache__/Model.cpython-310.pyc differ
 
options/Video_model/tdd_svd_scheduler.py ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import List, Optional, Tuple, Union
3
+
4
+ import numpy as np
5
+ import torch
6
+
7
+ from diffusers.configuration_utils import ConfigMixin, register_to_config
8
+ from diffusers.utils import BaseOutput, logging
9
+ from diffusers.utils.torch_utils import randn_tensor
10
+ from diffusers.schedulers.scheduling_utils import SchedulerMixin
11
+
12
+
13
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
14
+
15
+
16
+ @dataclass
17
+ class TDDSVDStochasticIterativeSchedulerOutput(BaseOutput):
18
+ """
19
+ Output class for the scheduler's `step` function.
20
+ Args:
21
+ prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
22
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
23
+ denoising loop.
24
+ """
25
+
26
+ prev_sample: torch.FloatTensor
27
+
28
+
29
+ class TDDSVDStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
30
+ """
31
+ Multistep and onestep sampling for consistency models.
32
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
33
+ methods the library implements for all schedulers such as loading and saving.
34
+ Args:
35
+ num_train_timesteps (`int`, defaults to 40):
36
+ The number of diffusion steps to train the model.
37
+ sigma_min (`float`, defaults to 0.002):
38
+ Minimum noise magnitude in the sigma schedule. Defaults to 0.002 from the original implementation.
39
+ sigma_max (`float`, defaults to 80.0):
40
+ Maximum noise magnitude in the sigma schedule. Defaults to 80.0 from the original implementation.
41
+ sigma_data (`float`, defaults to 0.5):
42
+ The standard deviation of the data distribution from the EDM
43
+ [paper](https://huggingface.co/papers/2206.00364). Defaults to 0.5 from the original implementation.
44
+ s_noise (`float`, defaults to 1.0):
45
+ The amount of additional noise to counteract loss of detail during sampling. A reasonable range is [1.000,
46
+ 1.011]. Defaults to 1.0 from the original implementation.
47
+ rho (`float`, defaults to 7.0):
48
+ The parameter for calculating the Karras sigma schedule from the EDM
49
+ [paper](https://huggingface.co/papers/2206.00364). Defaults to 7.0 from the original implementation.
50
+ clip_denoised (`bool`, defaults to `True`):
51
+ Whether to clip the denoised outputs to `(-1, 1)`.
52
+ timesteps (`List` or `np.ndarray` or `torch.Tensor`, *optional*):
53
+ An explicit timestep schedule that can be optionally specified. The timesteps are expected to be in
54
+ increasing order.
55
+ """
56
+
57
+ order = 1
58
+
59
+ @register_to_config
60
+ def __init__(
61
+ self,
62
+ num_train_timesteps: int = 40,
63
+ sigma_min: float = 0.002,
64
+ sigma_max: float = 80.0,
65
+ sigma_data: float = 0.5,
66
+ s_noise: float = 1.0,
67
+ rho: float = 7.0,
68
+ clip_denoised: bool = True,
69
+ eta: float = 0.3,
70
+ ):
71
+ # standard deviation of the initial noise distribution
72
+ self.init_noise_sigma = (sigma_max**2 + 1) ** 0.5
73
+ # self.init_noise_sigma = sigma_max
74
+
75
+ ramp = np.linspace(0, 1, num_train_timesteps)
76
+ sigmas = self._convert_to_karras(ramp)
77
+ sigmas = np.concatenate([sigmas, np.array([0])])
78
+ timesteps = self.sigma_to_t(sigmas)
79
+
80
+ # setable values
81
+ self.num_inference_steps = None
82
+ self.sigmas = torch.from_numpy(sigmas)
83
+ self.timesteps = torch.from_numpy(timesteps)
84
+ self.custom_timesteps = False
85
+ self.is_scale_input_called = False
86
+ self._step_index = None
87
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
88
+
89
+ self.set_eta(eta)
90
+ self.original_timesteps = self.timesteps.clone()
91
+ self.original_sigmas = self.sigmas.clone()
92
+
93
+
94
+ def index_for_timestep(self, timestep, schedule_timesteps=None):
95
+ if schedule_timesteps is None:
96
+ schedule_timesteps = self.timesteps
97
+
98
+ indices = (schedule_timesteps == timestep).nonzero()
99
+ return indices.item()
100
+
101
+ @property
102
+ def step_index(self):
103
+ """
104
+ The index counter for current timestep. It will increae 1 after each scheduler step.
105
+ """
106
+ return self._step_index
107
+
108
+ def scale_model_input(
109
+ self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
110
+ ) -> torch.FloatTensor:
111
+ """
112
+ Scales the consistency model input by `(sigma**2 + sigma_data**2) ** 0.5`.
113
+ Args:
114
+ sample (`torch.FloatTensor`):
115
+ The input sample.
116
+ timestep (`float` or `torch.FloatTensor`):
117
+ The current timestep in the diffusion chain.
118
+ Returns:
119
+ `torch.FloatTensor`:
120
+ A scaled input sample.
121
+ """
122
+ # Get sigma corresponding to timestep
123
+ if self.step_index is None:
124
+ self._init_step_index(timestep)
125
+
126
+ sigma = self.sigmas[self.step_index]
127
+ sample = sample / ((sigma**2 + self.config.sigma_data**2) ** 0.5)
128
+
129
+ self.is_scale_input_called = True
130
+ return sample
131
+
132
+ # def _sigma_to_t(self, sigma, log_sigmas):
133
+ # # get log sigma
134
+ # log_sigma = np.log(np.maximum(sigma, 1e-10))
135
+
136
+ # # get distribution
137
+ # dists = log_sigma - log_sigmas[:, np.newaxis]
138
+
139
+ # # get sigmas range
140
+ # low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2)
141
+ # high_idx = low_idx + 1
142
+
143
+ # low = log_sigmas[low_idx]
144
+ # high = log_sigmas[high_idx]
145
+
146
+ # # interpolate sigmas
147
+ # w = (low - log_sigma) / (low - high)
148
+ # w = np.clip(w, 0, 1)
149
+
150
+ # # transform interpolation to time range
151
+ # t = (1 - w) * low_idx + w * high_idx
152
+ # t = t.reshape(sigma.shape)
153
+ # return t
154
+
155
+ def sigma_to_t(self, sigmas: Union[float, np.ndarray]):
156
+ """
157
+ Gets scaled timesteps from the Karras sigmas for input to the consistency model.
158
+ Args:
159
+ sigmas (`float` or `np.ndarray`):
160
+ A single Karras sigma or an array of Karras sigmas.
161
+ Returns:
162
+ `float` or `np.ndarray`:
163
+ A scaled input timestep or scaled input timestep array.
164
+ """
165
+ if not isinstance(sigmas, np.ndarray):
166
+ sigmas = np.array(sigmas, dtype=np.float64)
167
+
168
+ timesteps = 0.25 * np.log(sigmas + 1e-44)
169
+
170
+ return timesteps
171
+
172
+ def set_timesteps(
173
+ self,
174
+ num_inference_steps: Optional[int] = None,
175
+ device: Union[str, torch.device] = None,
176
+ timesteps: Optional[List[int]] = None,
177
+ ):
178
+ """
179
+ Sets the timesteps used for the diffusion chain (to be run before inference).
180
+ Args:
181
+ num_inference_steps (`int`):
182
+ The number of diffusion steps used when generating samples with a pre-trained model.
183
+ device (`str` or `torch.device`, *optional*):
184
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
185
+ timesteps (`List[int]`, *optional*):
186
+ Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
187
+ timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed,
188
+ `num_inference_steps` must be `None`.
189
+ """
190
+ if num_inference_steps is None and timesteps is None:
191
+ raise ValueError(
192
+ "Exactly one of `num_inference_steps` or `timesteps` must be supplied."
193
+ )
194
+
195
+ if num_inference_steps is not None and timesteps is not None:
196
+ raise ValueError(
197
+ "Can only pass one of `num_inference_steps` or `timesteps`."
198
+ )
199
+
200
+ # Follow DDPMScheduler custom timesteps logic
201
+ if timesteps is not None:
202
+ for i in range(1, len(timesteps)):
203
+ if timesteps[i] >= timesteps[i - 1]:
204
+ raise ValueError("`timesteps` must be in descending order.")
205
+
206
+ if timesteps[0] >= self.config.num_train_timesteps:
207
+ raise ValueError(
208
+ f"`timesteps` must start before `self.config.train_timesteps`:"
209
+ f" {self.config.num_train_timesteps}."
210
+ )
211
+
212
+ timesteps = np.array(timesteps, dtype=np.int64)
213
+ self.custom_timesteps = True
214
+ else:
215
+ if num_inference_steps > self.config.num_train_timesteps:
216
+ raise ValueError(
217
+ f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
218
+ f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
219
+ f" maximal {self.config.num_train_timesteps} timesteps."
220
+ )
221
+
222
+ self.num_inference_steps = num_inference_steps
223
+
224
+ step_ratio = self.config.num_train_timesteps // self.num_inference_steps
225
+ timesteps = (np.arange(0, num_inference_steps) * step_ratio).round().copy().astype(np.int64)
226
+ self.custom_timesteps = False
227
+
228
+ self.original_indices = timesteps
229
+ # Map timesteps to Karras sigmas directly for multistep sampling
230
+ # See https://github.com/openai/consistency_models/blob/main/cm/karras_diffusion.py#L675
231
+ num_train_timesteps = self.config.num_train_timesteps
232
+ ramp = timesteps.copy()
233
+ ramp = ramp / (num_train_timesteps - 1)
234
+ sigmas = self._convert_to_karras(ramp)
235
+ timesteps = self.sigma_to_t(sigmas)
236
+
237
+ sigmas = np.concatenate([sigmas, [0]]).astype(np.float32)
238
+ self.sigmas = torch.from_numpy(sigmas).to(device=device)
239
+
240
+ if str(device).startswith("mps"):
241
+ # mps does not support float64
242
+ self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32)
243
+ else:
244
+ self.timesteps = torch.from_numpy(timesteps).to(device=device)
245
+
246
+ self._step_index = None
247
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
248
+
249
+ # Modified _convert_to_karras implementation that takes in ramp as argument
250
+ def _convert_to_karras(self, ramp):
251
+ """Constructs the noise schedule of Karras et al. (2022)."""
252
+
253
+ sigma_min: float = self.config.sigma_min
254
+ sigma_max: float = self.config.sigma_max
255
+
256
+ rho = self.config.rho
257
+ min_inv_rho = sigma_min ** (1 / rho)
258
+ max_inv_rho = sigma_max ** (1 / rho)
259
+ sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
260
+ return sigmas
261
+
262
+ def get_scalings(self, sigma):
263
+ sigma_data = self.config.sigma_data
264
+
265
+ c_skip = sigma_data**2 / (sigma**2 + sigma_data**2)
266
+ c_out = -sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5
267
+ return c_skip, c_out
268
+
269
+ def get_scalings_for_boundary_condition(self, sigma):
270
+ """
271
+ Gets the scalings used in the consistency model parameterization (from Appendix C of the
272
+ [paper](https://huggingface.co/papers/2303.01469)) to enforce boundary condition.
273
+ <Tip>
274
+ `epsilon` in the equations for `c_skip` and `c_out` is set to `sigma_min`.
275
+ </Tip>
276
+ Args:
277
+ sigma (`torch.FloatTensor`):
278
+ The current sigma in the Karras sigma schedule.
279
+ Returns:
280
+ `tuple`:
281
+ A two-element tuple where `c_skip` (which weights the current sample) is the first element and `c_out`
282
+ (which weights the consistency model output) is the second element.
283
+ """
284
+ sigma_min = self.config.sigma_min
285
+ sigma_data = self.config.sigma_data
286
+
287
+ c_skip = sigma_data**2 / ((sigma) ** 2 + sigma_data**2)
288
+ c_out = -sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5
289
+ return c_skip, c_out
290
+
291
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
292
+ def _init_step_index(self, timestep):
293
+ if isinstance(timestep, torch.Tensor):
294
+ timestep = timestep.to(self.timesteps.device)
295
+
296
+ index_candidates = (self.timesteps == timestep).nonzero()
297
+
298
+ # The sigma index that is taken for the **very** first `step`
299
+ # is always the second index (or the last index if there is only 1)
300
+ # This way we can ensure we don't accidentally skip a sigma in
301
+ # case we start in the middle of the denoising schedule (e.g. for image-to-image)
302
+ if len(index_candidates) > 1:
303
+ step_index = index_candidates[1]
304
+ else:
305
+ step_index = index_candidates[0]
306
+
307
+ self._step_index = step_index.item()
308
+
309
+ def step(
310
+ self,
311
+ model_output: torch.FloatTensor,
312
+ timestep: Union[float, torch.FloatTensor],
313
+ sample: torch.FloatTensor,
314
+ generator: Optional[torch.Generator] = None,
315
+ return_dict: bool = True,
316
+ ) -> Union[TDDSVDStochasticIterativeSchedulerOutput, Tuple]:
317
+ """
318
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
319
+ process from the learned model outputs (most often the predicted noise).
320
+ Args:
321
+ model_output (`torch.FloatTensor`):
322
+ The direct output from the learned diffusion model.
323
+ timestep (`float`):
324
+ The current timestep in the diffusion chain.
325
+ sample (`torch.FloatTensor`):
326
+ A current instance of a sample created by the diffusion process.
327
+ generator (`torch.Generator`, *optional*):
328
+ A random number generator.
329
+ return_dict (`bool`, *optional*, defaults to `True`):
330
+ Whether or not to return a
331
+ [`~schedulers.scheduling_consistency_models.TDDSVDStochasticIterativeSchedulerOutput`] or `tuple`.
332
+ Returns:
333
+ [`~schedulers.scheduling_consistency_models.TDDSVDStochasticIterativeSchedulerOutput`] or `tuple`:
334
+ If return_dict is `True`,
335
+ [`~schedulers.scheduling_consistency_models.TDDSVDStochasticIterativeSchedulerOutput`] is returned,
336
+ otherwise a tuple is returned where the first element is the sample tensor.
337
+ """
338
+
339
+ if (
340
+ isinstance(timestep, int)
341
+ or isinstance(timestep, torch.IntTensor)
342
+ or isinstance(timestep, torch.LongTensor)
343
+ ):
344
+ raise ValueError(
345
+ (
346
+ "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
347
+ f" `{self.__class__}.step()` is not supported. Make sure to pass"
348
+ " one of the `scheduler.timesteps` as a timestep."
349
+ ),
350
+ )
351
+
352
+ if not self.is_scale_input_called:
353
+ logger.warning(
354
+ "The `scale_model_input` function should be called before `step` to ensure correct denoising. "
355
+ "See `StableDiffusionPipeline` for a usage example."
356
+ )
357
+
358
+ sigma_min = self.config.sigma_min
359
+ sigma_max = self.config.sigma_max
360
+
361
+ if self.step_index is None:
362
+ self._init_step_index(timestep)
363
+
364
+ # sigma_next corresponds to next_t in original implementation
365
+ next_step_index = self.step_index + 1
366
+
367
+ sigma = self.sigmas[self.step_index]
368
+ if next_step_index < len(self.sigmas):
369
+ sigma_next = self.sigmas[next_step_index]
370
+ else:
371
+ # Set sigma_next to sigma_min
372
+ sigma_next = self.sigmas[-1]
373
+
374
+ # Get scalings for boundary conditions
375
+ c_skip, c_out = self.get_scalings_for_boundary_condition(sigma)
376
+
377
+ if next_step_index < len(self.original_indices):
378
+ next_step_original_index = self.original_indices[next_step_index]
379
+ step_s_original_index = int(next_step_original_index + self.eta * (self.config.num_train_timesteps - 1 - next_step_original_index))
380
+ sigma_s = self.original_sigmas[step_s_original_index]
381
+ else:
382
+ sigma_s = self.sigmas[-1]
383
+
384
+ # 1. Denoise model output using boundary conditions
385
+ denoised = c_out * model_output + c_skip * sample
386
+ if self.config.clip_denoised:
387
+ denoised = denoised.clamp(-1, 1)
388
+
389
+ d = (sample - denoised) / sigma
390
+ sample_s = sample + d * (sigma_s - sigma)
391
+
392
+ # 2. Sample z ~ N(0, s_noise^2 * I)
393
+ # Noise is not used for onestep sampling.
394
+ if len(self.timesteps) > 1:
395
+ noise = randn_tensor(
396
+ model_output.shape,
397
+ dtype=model_output.dtype,
398
+ device=model_output.device,
399
+ generator=generator,
400
+ )
401
+ else:
402
+ noise = torch.zeros_like(model_output)
403
+ z = noise * self.config.s_noise
404
+
405
+ sigma_hat = sigma_next.clamp(min = 0, max = sigma_max)
406
+ # sigma_hat = sigma_next.clamp(min = sigma_min, max = sigma_max)
407
+
408
+ # print("denoise currently")
409
+ # print(sigma_hat)
410
+
411
+ # origin
412
+ # prev_sample = denoised + z * sigma_hat
413
+ prev_sample = sample_s + z * (sigma_hat - sigma_s)
414
+
415
+ # upon completion increase step index by one
416
+ self._step_index += 1
417
+
418
+ if not return_dict:
419
+ return (prev_sample,)
420
+
421
+ return TDDSVDStochasticIterativeSchedulerOutput(prev_sample=prev_sample)
422
+
423
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise
424
+ def add_noise(
425
+ self,
426
+ original_samples: torch.FloatTensor,
427
+ noise: torch.FloatTensor,
428
+ timesteps: torch.FloatTensor,
429
+ ) -> torch.FloatTensor:
430
+ # Make sure sigmas and timesteps have the same device and dtype as original_samples
431
+ sigmas = self.sigmas.to(
432
+ device=original_samples.device, dtype=original_samples.dtype
433
+ )
434
+ if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
435
+ # mps does not support float64
436
+ schedule_timesteps = self.timesteps.to(
437
+ original_samples.device, dtype=torch.float32
438
+ )
439
+ timesteps = timesteps.to(original_samples.device, dtype=torch.float32)
440
+ else:
441
+ schedule_timesteps = self.timesteps.to(original_samples.device)
442
+ timesteps = timesteps.to(original_samples.device)
443
+
444
+ step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
445
+
446
+ sigma = sigmas[step_indices].flatten()
447
+ while len(sigma.shape) < len(original_samples.shape):
448
+ sigma = sigma.unsqueeze(-1)
449
+
450
+ noisy_samples = original_samples + noise * sigma
451
+ return noisy_samples
452
+
453
+ def __len__(self):
454
+ return self.config.num_train_timesteps
455
+
456
+ def set_eta(self, eta: float):
457
+ assert 0.0 <= eta <= 1.0
458
+ self.eta = eta