@@ -1966,6 +1966,16 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
1966
1966
}
1967
1967
1968
1968
bool ModelLoader::load_tensors (on_new_tensor_cb_t on_new_tensor_cb) {
1969
+ int64_t process_time_ms = 0 ;
1970
+ int64_t read_time_ms = 0 ;
1971
+ int64_t memcpy_time_ms = 0 ;
1972
+ int64_t copy_to_backend_time_ms = 0 ;
1973
+ int64_t convert_time_ms = 0 ;
1974
+
1975
+ int64_t prev_time_ms = 0 ;
1976
+ int64_t curr_time_ms = 0 ;
1977
+ int64_t start_time = ggml_time_ms ();
1978
+ prev_time_ms = start_time;
1969
1979
std::vector<TensorStorage> processed_tensor_storages;
1970
1980
for (auto & tensor_storage : tensor_storages) {
1971
1981
// LOG_DEBUG("%s", name.c_str());
@@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
1978
1988
}
1979
1989
std::vector<TensorStorage> dedup = remove_duplicates (processed_tensor_storages);
1980
1990
processed_tensor_storages = dedup;
1991
+ curr_time_ms = ggml_time_ms ();
1992
+ process_time_ms = curr_time_ms - prev_time_ms;
1993
+ prev_time_ms = curr_time_ms;
1981
1994
1982
1995
bool success = true ;
1983
1996
for (size_t file_index = 0 ; file_index < file_paths_.size (); file_index++) {
@@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
2019
2032
size_t entry_size = zip_entry_size (zip);
2020
2033
if (entry_size != n) {
2021
2034
read_buffer.resize (entry_size);
2035
+ prev_time_ms = ggml_time_ms ();
2022
2036
zip_entry_noallocread (zip, (void *)read_buffer.data (), entry_size);
2037
+ curr_time_ms = ggml_time_ms ();
2038
+ read_time_ms += curr_time_ms - prev_time_ms;
2039
+ prev_time_ms = curr_time_ms;
2023
2040
memcpy ((void *)buf, (void *)(read_buffer.data () + tensor_storage.offset ), n);
2041
+ curr_time_ms = ggml_time_ms ();
2042
+ memcpy_time_ms += curr_time_ms - prev_time_ms;
2024
2043
} else {
2044
+ prev_time_ms = ggml_time_ms ();
2025
2045
zip_entry_noallocread (zip, (void *)buf, n);
2046
+ curr_time_ms = ggml_time_ms ();
2047
+ read_time_ms += curr_time_ms - prev_time_ms;
2026
2048
}
2027
2049
zip_entry_close (zip);
2028
2050
} else {
2051
+ prev_time_ms = ggml_time_ms ();
2029
2052
file.seekg (tensor_storage.offset );
2030
2053
file.read (buf, n);
2054
+ curr_time_ms = ggml_time_ms ();
2055
+ read_time_ms += curr_time_ms - prev_time_ms;
2031
2056
if (!file) {
2032
2057
LOG_ERROR (" read tensor data failed: '%s'" , file_path.c_str ());
2033
2058
return false ;
@@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
2072
2097
read_data (tensor_storage, (char *)dst_tensor->data , nbytes_to_read);
2073
2098
}
2074
2099
2100
+ prev_time_ms = ggml_time_ms ();
2075
2101
if (tensor_storage.is_bf16 ) {
2076
2102
// inplace op
2077
2103
bf16_to_f32_vec ((uint16_t *)dst_tensor->data , (float *)dst_tensor->data , tensor_storage.nelements ());
@@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
2086
2112
} else if (tensor_storage.is_i64 ) {
2087
2113
i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)dst_tensor->data , tensor_storage.nelements ());
2088
2114
}
2115
+ curr_time_ms = ggml_time_ms ();
2116
+ convert_time_ms += curr_time_ms - prev_time_ms;
2089
2117
} else {
2090
2118
read_buffer.resize (std::max (tensor_storage.nbytes (), tensor_storage.nbytes_to_read ()));
2091
2119
read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
2092
2120
2121
+ prev_time_ms = ggml_time_ms ();
2093
2122
if (tensor_storage.is_bf16 ) {
2094
2123
// inplace op
2095
2124
bf16_to_f32_vec ((uint16_t *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
@@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
2109
2138
2110
2139
convert_tensor ((void *)read_buffer.data (), tensor_storage.type , dst_tensor->data ,
2111
2140
dst_tensor->type , (int )tensor_storage.nelements () / (int )tensor_storage.ne [0 ], (int )tensor_storage.ne [0 ]);
2141
+ curr_time_ms = ggml_time_ms ();
2142
+ convert_time_ms += curr_time_ms - prev_time_ms;
2112
2143
}
2113
2144
} else {
2114
2145
read_buffer.resize (std::max (tensor_storage.nbytes (), tensor_storage.nbytes_to_read ()));
2115
2146
read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
2116
2147
2148
+ prev_time_ms = ggml_time_ms ();
2117
2149
if (tensor_storage.is_bf16 ) {
2118
2150
// inplace op
2119
2151
bf16_to_f32_vec ((uint16_t *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
@@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
2133
2165
2134
2166
if (tensor_storage.type == dst_tensor->type ) {
2135
2167
// copy to device memory
2168
+ curr_time_ms = ggml_time_ms ();
2169
+ convert_time_ms += curr_time_ms - prev_time_ms;
2170
+ prev_time_ms = curr_time_ms;
2136
2171
ggml_backend_tensor_set (dst_tensor, read_buffer.data (), 0 , ggml_nbytes (dst_tensor));
2172
+ curr_time_ms = ggml_time_ms ();
2173
+ copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
2137
2174
} else {
2138
2175
// convert first, then copy to device memory
2139
2176
convert_buffer.resize (ggml_nbytes (dst_tensor));
2140
2177
convert_tensor ((void *)read_buffer.data (), tensor_storage.type ,
2141
2178
(void *)convert_buffer.data (), dst_tensor->type ,
2142
2179
(int )tensor_storage.nelements () / (int )tensor_storage.ne [0 ], (int )tensor_storage.ne [0 ]);
2180
+ curr_time_ms = ggml_time_ms ();
2181
+ convert_time_ms += curr_time_ms - prev_time_ms;
2182
+ prev_time_ms = curr_time_ms;
2143
2183
ggml_backend_tensor_set (dst_tensor, convert_buffer.data (), 0 , ggml_nbytes (dst_tensor));
2184
+ curr_time_ms = ggml_time_ms ();
2185
+ copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
2144
2186
}
2145
2187
}
2146
2188
++tensor_count;
@@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
2170
2212
break ;
2171
2213
}
2172
2214
}
2215
+ int64_t end_time = ggml_time_ms ();
2216
+ LOG_INFO (" loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)" ,
2217
+ (end_time - start_time) / 1000 .f ,
2218
+ process_time_ms / 1000 .f ,
2219
+ read_time_ms / 1000 .f ,
2220
+ memcpy_time_ms / 1000 .f ,
2221
+ convert_time_ms / 1000 .f ,
2222
+ copy_to_backend_time_ms / 1000 .f );
2173
2223
return success;
2174
2224
}
2175
2225
0 commit comments