@@ -815,8 +815,7 @@ class StableDiffusionGGML {
815815 const std::vector<float >& sigmas,
816816 int start_merge_step,
817817 SDCondition id_cond,
818- ggml_tensor* noise_mask = nullptr ) {
819- 818+ ggml_tensor* denoise_mask = NULL ) {
820819 std::vector<int > skip_layers (guidance.slg .layers , guidance.slg .layers + guidance.slg .layer_count );
821820
822821 // TODO (Pix2Pix): separate image guidance params (right now it's reusing distilled guidance)
@@ -1031,10 +1030,10 @@ class StableDiffusionGGML {
10311030 pretty_progress (step, (int )steps, (t1 - t0) / 1000000 .f );
10321031 // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000);
10331032 }
1034- if (noise_mask != nullptr ) {
1033+ if (denoise_mask != nullptr ) {
10351034 for (int64_t x = 0 ; x < denoised->ne [0 ]; x++) {
10361035 for (int64_t y = 0 ; y < denoised->ne [1 ]; y++) {
1037- float mask = ggml_tensor_get_f32 (noise_mask , x, y);
1036+ float mask = ggml_tensor_get_f32 (denoise_mask , x, y);
10381037 for (int64_t k = 0 ; k < denoised->ne [2 ]; k++) {
10391038 float init = ggml_tensor_get_f32 (init_latent, x, y, k);
10401039 float den = ggml_tensor_get_f32 (denoised, x, y, k);
@@ -1288,7 +1287,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12881287 float style_ratio,
12891288 bool normalize_input,
12901289 std::string input_id_images_path,
1291- ggml_tensor* concat_latent = NULL ) {
1290+ ggml_tensor* concat_latent = NULL ,
1291+ ggml_tensor* denoise_mask = NULL ) {
12921292 if (seed < 0 ) {
12931293 // Generally, when using the provided command line, the seed is always >0.
12941294 // However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@@ -1475,7 +1475,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
14751475 int W = width / 8 ;
14761476 int H = height / 8 ;
14771477 LOG_INFO (" sampling using %s method" , sampling_methods_str[sample_method]);
1478- ggml_tensor* noise_mask = nullptr ;
14791478
14801479 struct ggml_tensor * control_latent = NULL ;
14811480 if (sd_version_is_control (sd_ctx->sd ->version ) && image_hint != NULL ){
@@ -1544,8 +1543,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15441543 concat_latent = empty_latent;
15451544 }
15461545 cond.c_concat = concat_latent;
1546+ cond.c_concat = concat_latent;
15471547 uncond.c_concat = empty_latent;
1548- noise_mask = NULL ;
1548+ denoise_mask = NULL ;
15491549 } else if (sd_version_is_edit (sd_ctx->sd ->version ) || sd_version_is_control (sd_ctx->sd ->version )) {
15501550 LOG_INFO (" HERE" );
15511551 auto empty_latent = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, init_latent->ne [0 ], init_latent->ne [1 ], init_latent->ne [2 ], init_latent->ne [3 ]);
@@ -1561,8 +1561,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15611561 LOG_INFO (" HERE" );
15621562
15631563 cond.c_concat = concat_latent;
1564- } else {
1565- noise_mask = concat_latent;
15661564 }
15671565
15681566 for (int b = 0 ; b < batch_count; b++) {
@@ -1599,7 +1597,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15991597 sigmas,
16001598 start_merge_step,
16011599 id_cond,
1602- noise_mask );
1600+ denoise_mask );
16031601
16041602 // struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
16051603 // print_ggml_tensor(x_0);
@@ -1811,6 +1809,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
18111809 sd_image_to_tensor (init_image.data , init_img);
18121810
18131811 ggml_tensor* concat_latent;
1812+ ggml_tensor* denoise_mask = NULL ;
18141813
18151814 ggml_tensor* init_latent = NULL ;
18161815 ggml_tensor* init_moments = NULL ;
@@ -1950,7 +1949,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
19501949 style_ratio,
19511950 normalize_input,
19521951 input_id_images_path_c_str,
1953- concat_latent);
1952+ concat_latent,
1953+ denoise_mask);
19541954
19551955 size_t t2 = ggml_time_ms ();
19561956
0 commit comments