Skip to content

Commit 9f8054e

Browse files
Nathan Lambertdasparthosantiviquezpatrickvonplaten
authored
[docs sprint] schedulers docs, will update (huggingface#376)
* init schedulers docs * add some docstrings, fix sidebar formatting * add docstrings * [Type hint] PNDM schedulers (huggingface#335) * [Type hint] PNDM Schedulers * ran make style * updated timesteps type hint * apply suggestions from code review * ran make style * removed unused import * [Type hint] scheduling ddim (huggingface#343) * [Type hint] scheduling ddim * apply suggestions from code review apply suggestions to also return the return type Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * make style * update class docstrings * add docstrings * missed merge edit * add general docs page * modify headings for right sidebar Co-authored-by: Partho <parthodas6176@gmail.com> Co-authored-by: Santiago Víquez <santi.viquez@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
1 parent 83233e0 commit 9f8054e

8 files changed

+384
-53
lines changed

schedulers/scheduling_ddim.py

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,17 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
3030
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
3131
(1-beta) over time from t = [0,1].
3232
33-
:param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t
34-
from 0 to 1 and
35-
produces the cumulative product of (1-beta) up to that part of the diffusion process.
36-
:param max_beta: the maximum beta to use; use values lower than 1 to
33+
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
34+
to that part of the diffusion process.
35+
36+
37+
Args:
38+
num_diffusion_timesteps (`int`): the number of betas to produce.
39+
max_beta (`float`): the maximum beta to use; use values lower than 1 to
3740
prevent singularities.
41+
42+
Returns:
43+
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
3844
"""
3945

4046
def alpha_bar(time_step):
@@ -49,6 +55,29 @@ def alpha_bar(time_step):
4955

5056

5157
class DDIMScheduler(SchedulerMixin, ConfigMixin):
58+
"""
59+
Denoising diffusion implicit models is a scheduler that extends the denoising procedure introduced in denoising
60+
diffusion probabilistic models (DDPMs) with non-Markovian guidance.
61+
62+
For more details, see the original paper: https://arxiv.org/abs/2010.02502
63+
64+
Args:
65+
num_train_timesteps (`int`): number of diffusion steps used to train the model.
66+
beta_start (`float`): the starting `beta` value of inference.
67+
beta_end (`float`): the final `beta` value.
68+
beta_schedule (`str`):
69+
the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
70+
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
71+
trained_betas (`np.ndarray`, optional): TODO
72+
timestep_values (`np.ndarray`, optional): TODO
73+
clip_sample (`bool`, default `True`):
74+
option to clip predicted sample between -1 and 1 for numerical stability.
75+
set_alpha_to_one (`bool`, default `True`):
76+
if alpha for final step is 1 or the final alpha of the "non-previous" one.
77+
tensor_format (`str`): whether the scheduler expects pytorch or numpy arrays.
78+
79+
"""
80+
5281
@register_to_config
5382
def __init__(
5483
self,
@@ -62,7 +91,8 @@ def __init__(
6291
set_alpha_to_one: bool = True,
6392
tensor_format: str = "pt",
6493
):
65-
94+
if trained_betas is not None:
95+
self.betas = np.asarray(trained_betas)
6696
if beta_schedule == "linear":
6797
self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32)
6898
elif beta_schedule == "scaled_linear":
@@ -101,6 +131,14 @@ def _get_variance(self, timestep, prev_timestep):
101131
return variance
102132

103133
def set_timesteps(self, num_inference_steps: int, offset: int = 0):
134+
"""
135+
Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
136+
137+
Args:
138+
num_inference_steps (`int`):
139+
the number of diffusion steps used when generating samples with a pre-trained model.
140+
offset (`int`): TODO
141+
"""
104142
self.num_inference_steps = num_inference_steps
105143
self.timesteps = np.arange(
106144
0, self.config.num_train_timesteps, self.config.num_train_timesteps // self.num_inference_steps
@@ -118,7 +156,24 @@ def step(
118156
generator=None,
119157
return_dict: bool = True,
120158
) -> Union[SchedulerOutput, Tuple]:
121-
159+
"""
160+
Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
161+
process from the learned model outputs (most often the predicted noise).
162+
163+
Args:
164+
model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
165+
timestep (`int`): current discrete timestep in the diffusion chain.
166+
sample (`torch.FloatTensor` or `np.ndarray`):
167+
current instance of sample being created by diffusion process.
168+
eta (`float`): weight of noise for added noise in diffusion step.
169+
use_clipped_model_output (`bool`): TODO
170+
generator: random number generator.
171+
return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
172+
173+
Returns:
174+
`SchedulerOutput`: updated sample in the diffusion chain.
175+
176+
"""
122177
if self.num_inference_steps is None:
123178
raise ValueError(
124179
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"

schedulers/scheduling_ddpm.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,17 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
2929
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
3030
(1-beta) over time from t = [0,1].
3131
32-
:param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t
33-
from 0 to 1 and
34-
produces the cumulative product of (1-beta) up to that part of the diffusion process.
35-
:param max_beta: the maximum beta to use; use values lower than 1 to
32+
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
33+
to that part of the diffusion process.
34+
35+
36+
Args:
37+
num_diffusion_timesteps (`int`): the number of betas to produce.
38+
max_beta (`float`): the maximum beta to use; use values lower than 1 to
3639
prevent singularities.
40+
41+
Returns:
42+
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
3743
"""
3844

3945
def alpha_bar(time_step):
@@ -48,6 +54,29 @@ def alpha_bar(time_step):
4854

4955

5056
class DDPMScheduler(SchedulerMixin, ConfigMixin):
57+
"""
58+
Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and
59+
Langevin dynamics sampling.
60+
61+
For more details, see the original paper: https://arxiv.org/abs/2006.11239
62+
63+
Args:
64+
num_train_timesteps (`int`): number of diffusion steps used to train the model.
65+
beta_start (`float`): the starting `beta` value of inference.
66+
beta_end (`float`): the final `beta` value.
67+
beta_schedule (`str`):
68+
the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
69+
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
70+
trained_betas (`np.ndarray`, optional): TODO
71+
variance_type (`str`):
72+
options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
73+
`fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
74+
clip_sample (`bool`, default `True`):
75+
option to clip predicted sample between -1 and 1 for numerical stability.
76+
tensor_format (`str`): whether the scheduler expects pytorch or numpy arrays.
77+
78+
"""
79+
5180
@register_to_config
5281
def __init__(
5382
self,
@@ -88,6 +117,13 @@ def __init__(
88117
self.variance_type = variance_type
89118

90119
def set_timesteps(self, num_inference_steps: int):
120+
"""
121+
Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
122+
123+
Args:
124+
num_inference_steps (`int`):
125+
the number of diffusion steps used when generating samples with a pre-trained model.
126+
"""
91127
num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps)
92128
self.num_inference_steps = num_inference_steps
93129
self.timesteps = np.arange(
@@ -137,7 +173,25 @@ def step(
137173
generator=None,
138174
return_dict: bool = True,
139175
) -> Union[SchedulerOutput, Tuple]:
140-
176+
"""
177+
Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
178+
process from the learned model outputs (most often the predicted noise).
179+
180+
Args:
181+
model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
182+
timestep (`int`): current discrete timestep in the diffusion chain.
183+
sample (`torch.FloatTensor` or `np.ndarray`):
184+
current instance of sample being created by diffusion process.
185+
eta (`float`): weight of noise for added noise in diffusion step.
186+
predict_epsilon (`bool`):
187+
optional flag to use when model predicts the samples directly instead of the noise, epsilon.
188+
generator: random number generator.
189+
return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
190+
191+
Returns:
192+
`SchedulerOutput`: updated sample in the diffusion chain.
193+
194+
"""
141195
t = timestep
142196

143197
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]:

schedulers/scheduling_karras_ve.py

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,24 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
4949
[1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models."
5050
https://arxiv.org/abs/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic
5151
differential equations." https://arxiv.org/abs/2011.13456
52+
53+
For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of
54+
Diffusion-Based Generative Models." https://arxiv.org/abs/2206.00364. The grid search values used to find the
55+
optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper.
56+
57+
Args:
58+
sigma_min (`float`): minimum noise magnitude
59+
sigma_max (`float`): maximum noise magnitude
60+
s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling.
61+
A reasonable range is [1.000, 1.011].
62+
s_churn (`float`): the parameter controlling the overall amount of stochasticity.
63+
A reasonable range is [0, 100].
64+
s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity).
65+
A reasonable range is [0, 10].
66+
s_max (`float`): the end value of the sigma range where we add noise.
67+
A reasonable range is [0.2, 80].
68+
tensor_format (`str`): whether the scheduler expects pytorch or numpy arrays.
69+
5270
"""
5371

5472
@register_to_config
@@ -62,23 +80,6 @@ def __init__(
6280
s_max: float = 50,
6381
tensor_format: str = "pt",
6482
):
65-
"""
66-
For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of
67-
Diffusion-Based Generative Models." https://arxiv.org/abs/2206.00364. The grid search values used to find the
68-
optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper.
69-
70-
Args:
71-
sigma_min (`float`): minimum noise magnitude
72-
sigma_max (`float`): maximum noise magnitude
73-
s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling.
74-
A reasonable range is [1.000, 1.011].
75-
s_churn (`float`): the parameter controlling the overall amount of stochasticity.
76-
A reasonable range is [0, 100].
77-
s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity).
78-
A reasonable range is [0, 10].
79-
s_max (`float`): the end value of the sigma range where we add noise.
80-
A reasonable range is [0.2, 80].
81-
"""
8283
# setable values
8384
self.num_inference_steps = None
8485
self.timesteps = None
@@ -88,6 +89,14 @@ def __init__(
8889
self.set_format(tensor_format=tensor_format)
8990

9091
def set_timesteps(self, num_inference_steps: int):
92+
"""
93+
Sets the continuous timesteps used for the diffusion chain. Supporting function to be run before inference.
94+
95+
Args:
96+
num_inference_steps (`int`):
97+
the number of diffusion steps used when generating samples with a pre-trained model.
98+
99+
"""
91100
self.num_inference_steps = num_inference_steps
92101
self.timesteps = np.arange(0, self.num_inference_steps)[::-1].copy()
93102
self.schedule = [
@@ -104,6 +113,8 @@ def add_noise_to_input(
104113
"""
105114
Explicit Langevin-like "churn" step of adding noise to the sample according to a factor gamma_i ≥ 0 to reach a
106115
higher noise level sigma_hat = sigma_i + gamma_i*sigma_i.
116+
117+
TODO Args:
107118
"""
108119
if self.s_min <= sigma <= self.s_max:
109120
gamma = min(self.s_churn / self.num_inference_steps, 2**0.5 - 1)
@@ -125,6 +136,21 @@ def step(
125136
sample_hat: Union[torch.FloatTensor, np.ndarray],
126137
return_dict: bool = True,
127138
) -> Union[KarrasVeOutput, Tuple]:
139+
"""
140+
Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
141+
process from the learned model outputs (most often the predicted noise).
142+
143+
Args:
144+
model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
145+
sigma_hat (`float`): TODO
146+
sigma_prev (`float`): TODO
147+
sample_hat (`torch.FloatTensor` or `np.ndarray`): TODO
148+
return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
149+
150+
Returns:
151+
KarrasVeOutput: updated sample in the diffusion chain and derivative (TODO double check).
152+
153+
"""
128154

129155
pred_original_sample = sample_hat + sigma_hat * model_output
130156
derivative = (sample_hat - pred_original_sample) / sigma_hat
@@ -145,7 +171,22 @@ def step_correct(
145171
derivative: Union[torch.FloatTensor, np.ndarray],
146172
return_dict: bool = True,
147173
) -> Union[KarrasVeOutput, Tuple]:
174+
"""
175+
Correct the predicted sample based on the output model_output of the network. TODO complete description
176+
177+
Args:
178+
model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
179+
sigma_hat (`float`): TODO
180+
sigma_prev (`float`): TODO
181+
sample_hat (`torch.FloatTensor` or `np.ndarray`): TODO
182+
sample_prev (`torch.FloatTensor` or `np.ndarray`): TODO
183+
derivative (`torch.FloatTensor` or `np.ndarray`): TODO
184+
return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
185+
186+
Returns:
187+
prev_sample (TODO): updated sample in the diffusion chain. derivative (TODO): TODO
148188
189+
"""
149190
pred_original_sample = sample_prev + sigma_prev * model_output
150191
derivative_corr = (sample_prev - pred_original_sample) / sigma_prev
151192
sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr)

0 commit comments

Comments
 (0)