Commit d1d7420

committed

support for flux controls

1 parent fb604b7 commit d1d7420Copy full SHA for d1d7420

File tree

4 files changed

+55

-32

lines changed

flux.hpp
model.cpp
model.h
stable-diffusion.cpp

4 files changed

+55

-32

lines changed

`‎flux.hpp‎`

Lines changed: 10 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1032,6 +1032,14 @@ namespace Flux {`
`1032`	`1032`	`control = patchify(ctx, control, patch_size);`
`1033`	`1033`
`1034`	`1034`	`img = ggml_concat(ctx, img, ggml_concat(ctx, ggml_concat(ctx, masked, mask, 0), control, 0), 0);`
	`1035`	`+ } else if (version == VERSION_FLUX_CONTROLS) {`
	`1036`	`+ GGML_ASSERT(c_concat != NULL);`
	`1037`	`+`
	`1038`	`+ ggml_tensor* control = ggml_pad(ctx, c_concat, pad_w, pad_h, 0, 0);`
	`1039`	`+`
	`1040`	`+ control = patchify(ctx, control, patch_size);`
	`1041`	`+`
	`1042`	`+ img = ggml_concat(ctx, img, control, 0);`
`1035`	`1043`	`}`
`1036`	`1044`
`1037`	`1045`	`if (ref_latents.size() > 0) {`
`@@ -1079,6 +1087,8 @@ namespace Flux {`
`1079`	`1087`	`flux_params.depth_single_blocks = 0;`
`1080`	`1088`	`if (version == VERSION_FLUX_FILL) {`
`1081`	`1089`	`flux_params.in_channels = 384;`
	`1090`	`+ } else if (version == VERSION_FLUX_CONTROLS) {`
	`1091`	`+ flux_params.in_channels = 128;`
`1082`	`1092`	`} else if (version == VERSION_FLEX_2) {`
`1083`	`1093`	`flux_params.in_channels = 196;`
`1084`	`1094`	`}`

`‎model.cpp‎`

Lines changed: 4 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1685,10 +1685,12 @@ SDVersion ModelLoader::get_sd_version() {`
`1685`	`1685`	`}`
`1686`	`1686`
`1687`	`1687`	`if (is_flux) {`
`1688`		`- is_inpaint = input_block_weight.ne[0] == 384;`
`1689`		`- if (is_inpaint) {`
	`1688`	`+ if (input_block_weight.ne[0] == 384) {`
`1690`	`1689`	`return VERSION_FLUX_FILL;`
`1691`	`1690`	`}`
	`1691`	`+ if (input_block_weight.ne[0] == 128) {`
	`1692`	`+ return VERSION_FLUX_CONTROLS;`
	`1693`	`+ }`
`1692`	`1694`	`if(input_block_weight.ne[0] == 196){`
`1693`	`1695`	`return VERSION_FLEX_2;`
`1694`	`1696`	`}`

`‎model.h‎`

Lines changed: 12 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -31,12 +31,13 @@ enum SDVersion {`
`31`	`31`	`VERSION_SD3,`
`32`	`32`	`VERSION_FLUX,`
`33`	`33`	`VERSION_FLUX_FILL,`
	`34`	`+ VERSION_FLUX_CONTROLS,`
`34`	`35`	`VERSION_FLEX_2,`
`35`	`36`	`VERSION_COUNT,`
`36`	`37`	`};`
`37`	`38`
`38`	`39`	`static inline bool sd_version_is_flux(SDVersion version) {`
`39`		`- if (version == VERSION_FLUX \|\| version == VERSION_FLUX_FILL \|\| version == VERSION_FLEX_2 ) {`
	`40`	`+ if (version == VERSION_FLUX \|\| version == VERSION_FLUX_FILL \|\| version == VERSION_FLUX_CONTROLS \|\| version == VERSION_FLEX_2 ) {`
`40`	`41`	`return true;`
`41`	`42`	`}`
`42`	`43`	`return false;`
`@@ -70,15 +71,16 @@ static inline bool sd_version_is_sdxl(SDVersion version) {`
`70`	`71`	`return false;`
`71`	`72`	`}`
`72`	`73`
`73`		`-static inline bool sd_version_is_inpaint(SDVersion version) {`
`74`		`- if (version == VERSION_SD1_INPAINT \|\| version == VERSION_SD2_INPAINT \|\| version == VERSION_SDXL_INPAINT \|\| version == VERSION_FLUX_FILL \|\| version == VERSION_FLEX_2) {`
	`74`	`+`
	`75`	`+static inline bool sd_version_is_dit(SDVersion version) {`
	`76`	`+ if (sd_version_is_flux(version) \|\| sd_version_is_sd3(version)) {`
`75`	`77`	`return true;`
`76`	`78`	`}`
`77`	`79`	`return false;`
`78`	`80`	`}`
`79`	`81`
`80`		`-static inline bool sd_version_is_dit(SDVersion version) {`
`81`		`- if (sd_version_is_flux(version) \|\| sd_version_is_sd3(version)) {`
	`82`	`+static inline bool sd_version_is_inpaint(SDVersion version) {`
	`83`	`+ if (version == VERSION_SD1_INPAINT \|\| version == VERSION_SD2_INPAINT \|\| version == VERSION_SDXL_INPAINT \|\| version == VERSION_FLUX_FILL \|\| version == VERSION_FLEX_2) {`
`82`	`84`	`return true;`
`83`	`85`	`}`
`84`	`86`	`return false;`
`@@ -88,8 +90,12 @@ static inline bool sd_version_is_edit(SDVersion version) {`
`88`	`90`	`return version == VERSION_SD1_PIX2PIX \|\| version == VERSION_SDXL_PIX2PIX;`
`89`	`91`	`}`
`90`	`92`
	`93`	`+static inline bool sd_version_is_control(SDVersion version) {`
	`94`	`+ return version == VERSION_FLUX_CONTROLS \|\| version == VERSION_FLEX_2;`
	`95`	`+}`
	`96`	`+`
`91`	`97`	`static bool sd_version_use_concat(SDVersion version) {`
`92`		`- return sd_version_is_edit(version) \|\| sd_version_is_inpaint(version);`
	`98`	`+ return sd_version_is_edit(version) \|\| sd_version_is_inpaint(version)\|\| sd_version_is_control(version);`
`93`	`99`	`}`
`94`	`100`
`95`	`101`	`enum PMVersion {`

`‎stable-diffusion.cpp‎`

Lines changed: 29 additions & 24 deletions

Original file line number	Diff line number	Diff line change
`@@ -314,7 +314,7 @@ class StableDiffusionGGML {`
`314`	`314`	`// TODO: shift_factor`
`315`	`315`	`}`
`316`	`316`
`317`		`- if(version == VERSION_FLEX_2){`
	`317`	`+ if (sd_version_is_control(version)) {`
`318`	`318`	`// Might need vae encode for control cond`
`319`	`319`	`vae_decode_only = false;`
`320`	`320`	`}`
`@@ -840,7 +840,7 @@ class StableDiffusionGGML {`
`840`	`840`	`int start_merge_step,`
`841`	`841`	`SDCondition id_cond,`
`842`	`842`	`std::vector<ggml_tensor*> ref_latents = {},`
`843`		`- ggml_tensor* denoise_mask = nullptr) {`
	`843`	`+ ggml_tensor* denoise_mask = nullptr) {`
`844`	`844`	`std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);`
`845`	`845`
`846`	`846`	`// TODO (Pix2Pix): separate image guidance params (right now it's reusing distilled guidance)`
`@@ -1512,6 +1512,17 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,`
`1512`	`1512`	`int W = width / 8;`
`1513`	`1513`	`int H = height / 8;`
`1514`	`1514`	`LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]);`
	`1515`	`+`
	`1516`	`+ struct ggml_tensor* control_latent = NULL;`
	`1517`	`+ if (sd_version_is_control(sd_ctx->sd->version) && image_hint != NULL) {`
	`1518`	`+ if (!sd_ctx->sd->use_tiny_autoencoder) {`
	`1519`	`+ struct ggml_tensor* control_moments = sd_ctx->sd->encode_first_stage(work_ctx, image_hint);`
	`1520`	`+ control_latent = sd_ctx->sd->get_first_stage_encoding(work_ctx, control_moments);`
	`1521`	`+ } else {`
	`1522`	`+ control_latent = sd_ctx->sd->encode_first_stage(work_ctx, image_hint);`
	`1523`	`+ }`
	`1524`	`+ }`
	`1525`	`+`
`1515`	`1526`	`if (sd_version_is_inpaint(sd_ctx->sd->version)) {`
`1516`	`1527`	`int64_t mask_channels = 1;`
`1517`	`1528`	`if (sd_ctx->sd->version == VERSION_FLUX_FILL) {`
`@@ -1544,50 +1555,44 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,`
`1544`	`1555`	`}`
`1545`	`1556`	`}`
`1546`	`1557`	`}`
`1547`		`- if (sd_ctx->sd->version == VERSION_FLEX_2 && image_hint != NULL && sd_ctx->sd->control_net == NULL) {`
	`1558`	`+`
	`1559`	`+ if (sd_ctx->sd->version == VERSION_FLEX_2 && control_latent != NULL && sd_ctx->sd->control_net == NULL) {`
`1548`	`1560`	`bool no_inpaint = concat_latent == NULL;`
`1549`	`1561`	`if (no_inpaint) {`
`1550`	`1562`	`concat_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, init_latent->ne[0], init_latent->ne[1], mask_channels + init_latent->ne[2], 1);`
`1551`	`1563`	`}`
`1552`	`1564`	`// fill in the control image here`
`1553`		`- struct ggml_tensor* control_latents = NULL;`
`1554`		`- if (!sd_ctx->sd->use_tiny_autoencoder) {`
`1555`		`- struct ggml_tensor* control_moments = sd_ctx->sd->encode_first_stage(work_ctx, image_hint);`
`1556`		`- control_latents = sd_ctx->sd->get_first_stage_encoding(work_ctx, control_moments);`
`1557`		`- } else {`
`1558`		`- control_latents = sd_ctx->sd->encode_first_stage(work_ctx, image_hint);`
`1559`		`- }`
`1560`		`- for (int64_t x = 0; x < concat_latent->ne[0]; x++) {`
`1561`		`- for (int64_t y = 0; y < concat_latent->ne[1]; y++) {`
	`1565`	`+ for (int64_t x = 0; x < control_latent->ne[0]; x++) {`
	`1566`	`+ for (int64_t y = 0; y < control_latent->ne[1]; y++) {`
`1562`	`1567`	`if (no_inpaint) {`
`1563`		`- for (int64_t c = 0; c < concat_latent->ne[2] - control_latents->ne[2]; c++) {`
	`1568`	`+ for (int64_t c = 0; c < concat_latent->ne[2] - control_latent->ne[2]; c++) {`
`1564`	`1569`	`// 0x16,1x1,0x16`
`1565`	`1570`	`ggml_tensor_set_f32(concat_latent, c == init_latent->ne[2], x, y, c);`
`1566`	`1571`	`}`
`1567`	`1572`	`}`
`1568`		`- for (int64_t c = 0; c < control_latents->ne[2]; c++) {`
`1569`		`- float v = ggml_tensor_get_f32(control_latents, x, y, c);`
`1570`		`- ggml_tensor_set_f32(concat_latent, v, x, y, concat_latent->ne[2] - control_latents->ne[2] + c);`
	`1573`	`+ for (int64_t c = 0; c < control_latent->ne[2]; c++) {`
	`1574`	`+ float v = ggml_tensor_get_f32(control_latent, x, y, c);`
	`1575`	`+ ggml_tensor_set_f32(concat_latent, v, x, y, concat_latent->ne[2] - control_latent->ne[2] + c);`
`1571`	`1576`	`}`
`1572`	`1577`	`}`
`1573`	`1578`	`}`
`1574`		`- // Disable controlnet`
`1575`		`- image_hint = NULL;`
`1576`	`1579`	`} else if (concat_latent == NULL) {`
`1577`	`1580`	`concat_latent = empty_latent;`
`1578`	`1581`	`}`
`1579`	`1582`	`cond.c_concat = concat_latent;`
`1580`	`1583`	`uncond.c_concat = empty_latent;`
`1581`	`1584`	`denoise_mask = NULL;`
`1582`		`- } else if (sd_version_is_edit(sd_ctx->sd->version)) {`
	`1585`	`+ } else if (sd_version_is_edit(sd_ctx->sd->version) \|\| sd_version_is_control(sd_ctx->sd->version)) {`
`1583`	`1586`	`auto empty_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, init_latent->ne[0], init_latent->ne[1], init_latent->ne[2], init_latent->ne[3]);`
`1584`	`1587`	`ggml_set_f32(empty_latent, 0);`
`1585`	`1588`	`uncond.c_concat = empty_latent;`
	`1589`	`+ if (sd_version_is_control(sd_ctx->sd->version) && control_latent != NULL && sd_ctx->sd->control_net == NULL) {`
	`1590`	`+ concat_latent = control_latent;`
	`1591`	`+ }`
`1586`	`1592`	`if (concat_latent == NULL) {`
`1587`	`1593`	`concat_latent = empty_latent;`
`1588`	`1594`	`}`
`1589`		`- cond.c_concat = concat_latent;`
`1590`		`-`
	`1595`	`+ cond.c_concat = concat_latent;`
`1591`	`1596`	`}`
`1592`	`1597`	`for (int b = 0; b < batch_count; b++) {`
`1593`	`1598`	`int64_t sampling_start = ggml_time_ms();`
`@@ -1870,7 +1875,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,`
`1870`	`1875`	`ggml_tensor* masked_latent = NULL;`
`1871`	`1876`	`if (!sd_ctx->sd->use_tiny_autoencoder) {`
`1872`	`1877`	`ggml_tensor* moments = sd_ctx->sd->encode_first_stage(work_ctx, masked_img);`
`1873`		`- masked_latent = sd_ctx->sd->get_first_stage_encoding(work_ctx, moments);`
	`1878`	`+ masked_latent = sd_ctx->sd->get_first_stage_encoding(work_ctx, moments);`
`1874`	`1879`	`} else {`
`1875`	`1880`	`masked_latent = sd_ctx->sd->encode_first_stage(work_ctx, masked_img);`
`1876`	`1881`	`}`
`@@ -1941,8 +1946,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,`
`1941`	`1946`	`} else {`
`1942`	`1947`	`concat_latent = init_latent;`
`1943`	`1948`	`}`
`1944`		`- }`
`1945`		`-`
	`1949`	`+ }`
	`1950`	`+`
`1946`	`1951`	`{`
`1947`	`1952`	`// LOG_WARN("Inpainting with a base model is not great");`
`1948`	`1953`	`denoise_mask = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width / 8, height / 8, 1, 1);`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit d1d7420

File tree

4 files changed

4 files changed

`‎flux.hpp‎`

`‎model.cpp‎`

`‎model.h‎`

`‎stable-diffusion.cpp‎`

0 commit comments