Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit c648001

Browse files
authored
feat: add detailed tensor loading time stat (#793)
1 parent c587a43 commit c648001

File tree

2 files changed

+50
-6
lines changed

2 files changed

+50
-6
lines changed

‎model.cpp‎

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,6 +1966,16 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
19661966
}
19671967

19681968
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
1969+
int64_t process_time_ms = 0;
1970+
int64_t read_time_ms = 0;
1971+
int64_t memcpy_time_ms = 0;
1972+
int64_t copy_to_backend_time_ms = 0;
1973+
int64_t convert_time_ms = 0;
1974+
1975+
int64_t prev_time_ms = 0;
1976+
int64_t curr_time_ms = 0;
1977+
int64_t start_time = ggml_time_ms();
1978+
prev_time_ms = start_time;
19691979
std::vector<TensorStorage> processed_tensor_storages;
19701980
for (auto& tensor_storage : tensor_storages) {
19711981
// LOG_DEBUG("%s", name.c_str());
@@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
19781988
}
19791989
std::vector<TensorStorage> dedup = remove_duplicates(processed_tensor_storages);
19801990
processed_tensor_storages = dedup;
1991+
curr_time_ms = ggml_time_ms();
1992+
process_time_ms = curr_time_ms - prev_time_ms;
1993+
prev_time_ms = curr_time_ms;
19811994

19821995
bool success = true;
19831996
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
@@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20192032
size_t entry_size = zip_entry_size(zip);
20202033
if (entry_size != n) {
20212034
read_buffer.resize(entry_size);
2035+
prev_time_ms = ggml_time_ms();
20222036
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
2037+
curr_time_ms = ggml_time_ms();
2038+
read_time_ms += curr_time_ms - prev_time_ms;
2039+
prev_time_ms = curr_time_ms;
20232040
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
2041+
curr_time_ms = ggml_time_ms();
2042+
memcpy_time_ms += curr_time_ms - prev_time_ms;
20242043
} else {
2044+
prev_time_ms = ggml_time_ms();
20252045
zip_entry_noallocread(zip, (void*)buf, n);
2046+
curr_time_ms = ggml_time_ms();
2047+
read_time_ms += curr_time_ms - prev_time_ms;
20262048
}
20272049
zip_entry_close(zip);
20282050
} else {
2051+
prev_time_ms = ggml_time_ms();
20292052
file.seekg(tensor_storage.offset);
20302053
file.read(buf, n);
2054+
curr_time_ms = ggml_time_ms();
2055+
read_time_ms += curr_time_ms - prev_time_ms;
20312056
if (!file) {
20322057
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
20332058
return false;
@@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20722097
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
20732098
}
20742099

2100+
prev_time_ms = ggml_time_ms();
20752101
if (tensor_storage.is_bf16) {
20762102
// inplace op
20772103
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
@@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20862112
} else if (tensor_storage.is_i64) {
20872113
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
20882114
}
2115+
curr_time_ms = ggml_time_ms();
2116+
convert_time_ms += curr_time_ms - prev_time_ms;
20892117
} else {
20902118
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
20912119
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
20922120

2121+
prev_time_ms = ggml_time_ms();
20932122
if (tensor_storage.is_bf16) {
20942123
// inplace op
20952124
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
@@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21092138

21102139
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
21112140
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2141+
curr_time_ms = ggml_time_ms();
2142+
convert_time_ms += curr_time_ms - prev_time_ms;
21122143
}
21132144
} else {
21142145
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
21152146
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
21162147

2148+
prev_time_ms = ggml_time_ms();
21172149
if (tensor_storage.is_bf16) {
21182150
// inplace op
21192151
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
@@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21332165

21342166
if (tensor_storage.type == dst_tensor->type) {
21352167
// copy to device memory
2168+
curr_time_ms = ggml_time_ms();
2169+
convert_time_ms += curr_time_ms - prev_time_ms;
2170+
prev_time_ms = curr_time_ms;
21362171
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
2172+
curr_time_ms = ggml_time_ms();
2173+
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
21372174
} else {
21382175
// convert first, then copy to device memory
21392176
convert_buffer.resize(ggml_nbytes(dst_tensor));
21402177
convert_tensor((void*)read_buffer.data(), tensor_storage.type,
21412178
(void*)convert_buffer.data(), dst_tensor->type,
21422179
(int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2180+
curr_time_ms = ggml_time_ms();
2181+
convert_time_ms += curr_time_ms - prev_time_ms;
2182+
prev_time_ms = curr_time_ms;
21432183
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
2184+
curr_time_ms = ggml_time_ms();
2185+
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
21442186
}
21452187
}
21462188
++tensor_count;
@@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21702212
break;
21712213
}
21722214
}
2215+
int64_t end_time = ggml_time_ms();
2216+
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
2217+
(end_time - start_time) / 1000.f,
2218+
process_time_ms / 1000.f,
2219+
read_time_ms / 1000.f,
2220+
memcpy_time_ms / 1000.f,
2221+
convert_time_ms / 1000.f,
2222+
copy_to_backend_time_ms / 1000.f);
21732223
return success;
21742224
}
21752225

‎stable-diffusion.cpp‎

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -557,8 +557,6 @@ class StableDiffusionGGML {
557557
// load weights
558558
LOG_DEBUG("loading weights");
559559

560-
int64_t t0 = ggml_time_ms();
561-
562560
std::set<std::string> ignore_tensors;
563561
tensors["alphas_cumprod"] = alphas_cumprod_tensor;
564562
if (use_tiny_autoencoder) {
@@ -656,11 +654,7 @@ class StableDiffusionGGML {
656654
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
657655
}
658656

659-
int64_t t1 = ggml_time_ms();
660-
LOG_INFO("loading model from '%s' completed, taking %.2fs", SAFE_STR(sd_ctx_params->model_path), (t1 - t0) * 1.0f / 1000);
661-
662657
// check is_using_v_parameterization_for_sd2
663-
664658
if (sd_version_is_sd2(version)) {
665659
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
666660
is_using_v_parameterization = true;

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /