Commit 35843c7

authored

fix: optimize the handling of embedding weight (#859)

1 parent 6ad46bb commit 35843c7Copy full SHA for 35843c7

File tree

+15

-5

lines changed

+15

-5

lines changed

Lines changed: 3 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -553,10 +553,9 @@ class CLIPEmbeddings : public GGMLBlock {`
`553`	`553`	`void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {`
`554`	`554`	`enum ggml_type token_wtype = GGML_TYPE_F32;`
`555`	`555`	`if (!force_clip_f32) {`
`556`		`- auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");`
`557`		`- std::set<ggml_type> allow_types = {GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};`
`558`		`- if (tensor_type != tensor_types.end() && allow_types.find(tensor_type->second) != allow_types.end()) {`
`559`		`- token_wtype = tensor_type->second;`
	`556`	`+ token_wtype = get_type(prefix + "token_embedding.weight", tensor_types, GGML_TYPE_F32);`
	`557`	`+ if (!support_get_rows(token_wtype)) {`
	`558`	`+ token_wtype = GGML_TYPE_F32;`
`560`	`559`	`}`
`561`	`560`	`}`
`562`	`561`	`enum ggml_type position_wtype = GGML_TYPE_F32;`

Lines changed: 12 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1967,13 +1967,24 @@ class Linear : public UnaryBlock {`
`1967`	`1967`	`}`
`1968`	`1968`	`};`
`1969`	`1969`
	`1970`	`+__STATIC_INLINE__ bool support_get_rows(ggml_type wtype) {`
	`1971`	`+ std::set<ggml_type> allow_types = {GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};`
	`1972`	`+ if (allow_types.find(wtype) != allow_types.end()) {`
	`1973`	`+ return true;`
	`1974`	`+ }`
	`1975`	`+ return false;`
	`1976`	`+}`
	`1977`	`+`
`1970`	`1978`	`class Embedding : public UnaryBlock {`
`1971`	`1979`	`protected:`
`1972`	`1980`	`int64_t embedding_dim;`
`1973`	`1981`	`int64_t num_embeddings;`
`1974`	`1982`	`void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {`
`1975`	`1983`	`enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);`
`1976`		`- params["weight"] = ggml_new_tensor_2d(ctx, wtype, embedding_dim, num_embeddings);`
	`1984`	`+ if (!support_get_rows(wtype)) {`
	`1985`	`+ wtype = GGML_TYPE_F32;`
	`1986`	`+ }`
	`1987`	`+ params["weight"] = ggml_new_tensor_2d(ctx, wtype, embedding_dim, num_embeddings);`
`1977`	`1988`	`}`
`1978`	`1989`
`1979`	`1990`	`public:`

Comments

(0)