Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 7aa4d93

Browse files
committed
fix: adjust timestep calculations for DDIM and TCD
On img2img, the number of steps correspond to the last precalculated sigma values, but the internal alphas_cumprod and compvis_sigmas were being computed over the entire step range. Also, tweaks the prev_timestep calculation on DDIM to better match the current timestamp (like on TCD), to avoid inconsistencies due to rounding.
1 parent 10c6501 commit 7aa4d93

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

‎denoiser.hpp‎

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ static void sample_k_diffusion(sample_method_t method,
474474
ggml_context* work_ctx,
475475
ggml_tensor* x,
476476
std::vector<float> sigmas,
477+
int initial_step,
477478
std::shared_ptr<RNG> rng,
478479
float eta) {
479480
size_t steps = sigmas.size() - 1;
@@ -1060,10 +1061,14 @@ static void sample_k_diffusion(sample_method_t method,
10601061
// x_t"
10611062
// - pred_prev_sample -> "x_t-1"
10621063
int timestep =
1063-
roundf(TIMESTEPS -
1064-
i * ((float)TIMESTEPS / steps)) - 1;
1064+
TIMESTEPS - 1 -
1065+
(int)roundf((initial_step + i) *
1066+
(TIMESTEPS / float(initial_step + steps)));
10651067
// 1. get previous step value (=t-1)
1066-
int prev_timestep = timestep - TIMESTEPS / steps;
1068+
int prev_timestep =
1069+
TIMESTEPS - 1 -
1070+
(int)roundf((initial_step + i + 1) *
1071+
(TIMESTEPS / float(initial_step + steps)));
10671072
// The sigma here is chosen to cause the
10681073
// CompVisDenoiser to produce t = timestep
10691074
float sigma = compvis_sigmas[timestep];
@@ -1236,12 +1241,13 @@ static void sample_k_diffusion(sample_method_t method,
12361241
// Analytic form for TCD timesteps
12371242
int timestep = TIMESTEPS - 1 -
12381243
(TIMESTEPS / original_steps) *
1239-
(int)floor(i * ((float)original_steps / steps));
1244+
(int)floor((initial_step + i) *
1245+
((float)original_steps / (initial_step + steps)));
12401246
// 1. get previous step value
12411247
int prev_timestep = i >= steps - 1 ? 0 :
12421248
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1243-
(int)floor((i + 1) *
1244-
((float)original_steps / steps));
1249+
(int)floor((initial_step + i + 1) *
1250+
((float)original_steps / (initial_step + steps)));
12451251
// Here timestep_s is tau_n' in Algorithm 4. The _s
12461252
// notation appears to be that from C. Lu,
12471253
// "DPM-Solver: A Fast ODE Solver for Diffusion

‎stable-diffusion.cpp‎

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ class StableDiffusionGGML {
798798
float eta,
799799
sample_method_t method,
800800
const std::vector<float>& sigmas,
801+
int initial_step,
801802
int start_merge_step,
802803
SDCondition id_cond,
803804
std::vector<int> skip_layers = {},
@@ -991,7 +992,7 @@ class StableDiffusionGGML {
991992
return denoised;
992993
};
993994

994-
sample_k_diffusion(method, denoise, work_ctx, x, sigmas, rng, eta);
995+
sample_k_diffusion(method, denoise, work_ctx, x, sigmas, initial_step, rng, eta);
995996

996997
x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x);
997998

@@ -1202,6 +1203,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12021203
int height,
12031204
enum sample_method_t sample_method,
12041205
const std::vector<float>& sigmas,
1206+
int initial_step,
12051207
int64_t seed,
12061208
int batch_count,
12071209
const sd_image_t* control_cond,
@@ -1464,6 +1466,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
14641466
eta,
14651467
sample_method,
14661468
sigmas,
1469+
initial_step,
14671470
start_merge_step,
14681471
id_cond,
14691472
skip_layers,
@@ -1611,6 +1614,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16111614
height,
16121615
sample_method,
16131616
sigmas,
1617+
0,
16141618
seed,
16151619
batch_count,
16161620
control_cond,
@@ -1775,8 +1779,9 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
17751779
if (t_enc == sample_steps)
17761780
t_enc--;
17771781
LOG_INFO("target t_enc is %zu steps", t_enc);
1782+
int initial_step = sample_steps - t_enc - 1;
17781783
std::vector<float> sigma_sched;
1779-
sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end());
1784+
sigma_sched.assign(sigmas.begin() + initial_step, sigmas.end());
17801785

17811786
sd_image_t* result_images = generate_image(sd_ctx,
17821787
work_ctx,
@@ -1791,6 +1796,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
17911796
height,
17921797
sample_method,
17931798
sigma_sched,
1799+
initial_step,
17941800
seed,
17951801
batch_count,
17961802
control_cond,
@@ -1903,6 +1909,7 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
19031909
0.f,
19041910
sample_method,
19051911
sigmas,
1912+
0,
19061913
-1,
19071914
SDCondition(NULL, NULL, NULL));
19081915

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /