Return to Question

Tweeted twitter.com/StackCodeReview/status/1560008568736202756

occurred Aug 17, 2022 at 21:00

Reverted code to the original: no modifications allowed after an answer has been posted.; added 1 character in body

Source Link

edited Aug 17, 2022 at 15:26

200_success

edited Aug 17, 2022 at 15:26

200_success

145.5k
22
190
478

The code as originally presented..

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // we have a bin that has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K - item.second,{item}});
 }
 }
 }
 return bins;
}
// Test data.. 
int main() {
 
 size_t max_capacity = 500;
 std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for (auto &i : obj) {
 u[x++] = i;
 }
 
 auto bins = make_bins(u,max_capacity); // 69,99
 for (auto& bin: bins) {
 size_t wt = 0;
 std::cout << max_capacity - bin.first << ": [";
 for (auto& i: bin.second) {
 wt += i.second;
 std::cout << '{' << i.first << ',' << i.second << '}';
 }
 std::cout << "]\n";
 }
 return 0;
}

Addenda...

The test above is not very accurate, and does not reflect usage. Instead, following the thoughts offered by g-sliepen below, I modified the method to use a vector input, in order to test sorted (heavy first) weights, and likewise a better sample generation.

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>

std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_tunordered_map<size_t,size_t>>&size_t>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 bins.insert({K,{}});
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // we have a bin that has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
int main() {
// Test std::random_devicedata.. rd;
 std::mt19937int gen(rdmain());
 {
 std::normal_distribution<> d{213777.0,1591525.0};
 size_t countmax_capacity = 15000;500;
 std::vector<size_t> max_capacitiesobj = {5000002,500000042,500000002,50000000032,500000000021,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
 size_t min_bins=SIZE_MAX, max_bins=0;
 for (auto& max_capacity: max_capacities) {
 for (size_t a = 0; a < 2; a++) {
 for (size_t t = 0; t < 100; t++) { // try this 100 times
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for(int i=0; i(auto <&i count;: ++iobj) {
 u[x++] = std::round(std::abs(d(gen)));i;
 }
 std::vector<std::pair<size_t,size_t>> v;
 auto bins for= make_bins(auto& item: u) {
 v.push_back(item,max_capacity); // }69,99
 iffor (aauto& ==bin: 0bins) {
 size_t wt = 0;
 struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { returncout b.second<< <max_capacity - abin.second; } } pr_gte;
 first << std:":sort(v.begin(), v.end(), pr_gte);
  }[";
 auto bins =for make_bins(v,max_capacity);
 min_bins = min_bins < bins.size() ? min_binsauto& i: binsbin.size(second);
  {
 max_bins = max_bins > bins.size() ? max_binswt :+= binsi.size();
 }second;
 std::cout << max_capacity << (a==0? "  sorted: " : " unsorted: ") '{' << min_binsi.first << " – "',' << max_binsi.second << std::endl;
  min_bins=SIZE_MAX; max_bins=0;'}';
 }
 }
 std::cout << std::endl;"]\n";
 }
 return 0;
}

I found that, as the maximum bin size increases, pre-sorted weights sometimes miss a trick and produce more bins than are strictly necessary. But in general, the edge would be that sorted bins pay off - but not by much. My conclusion is that pre-sorting, for this purpose, is probably unnecessary.

across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4

The code as originally presented..

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // we have a bin that has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K - item.second,{item}});
 }
 }
 }
 return bins;
}
// Test data.. 
int main() {
 
 size_t max_capacity = 500;
 std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for (auto &i : obj) {
 u[x++] = i;
 }
 
 auto bins = make_bins(u,max_capacity); // 69,99
 for (auto& bin: bins) {
 size_t wt = 0;
 std::cout << max_capacity - bin.first << ": [";
 for (auto& i: bin.second) {
 wt += i.second;
 std::cout << '{' << i.first << ',' << i.second << '}';
 }
 std::cout << "]\n";
 }
 return 0;
}

Addenda...

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>

std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_t,size_t>>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // bin has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
int main() {
 std::random_device rd;
 std::mt19937 gen(rd());
 std::normal_distribution<> d{213777.0,1591525.0};
 size_t count = 15000;
 std::vector<size_t> max_capacities = {500000,5000000,50000000,500000000,5000000000};
 size_t min_bins=SIZE_MAX, max_bins=0;
 for (auto& max_capacity: max_capacities) {
 for (size_t a = 0; a < 2; a++) {
 for (size_t t = 0; t < 100; t++) { // try this 100 times
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for(int i=0; i < count; ++i) {
 u[x++] = std::round(std::abs(d(gen)));
 }
 std::vector<std::pair<size_t,size_t>> v;
 for (auto& item: u) {
 v.push_back(item); }
 if (a == 0) {
 struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { return b.second < a.second; } } pr_gte;
  std::sort(v.begin(), v.end(), pr_gte);
  }
 auto bins = make_bins(v,max_capacity);
 min_bins = min_bins < bins.size() ? min_bins : bins.size();
  max_bins = max_bins > bins.size() ? max_bins : bins.size();
 }
 std::cout << max_capacity << (a==0? "  sorted: " : " unsorted: ")  << min_bins << " – " << max_bins << std::endl;
  min_bins=SIZE_MAX; max_bins=0;
 }
 }
 std::cout << std::endl;
 return 0;
}

across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4

The code

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 bins.insert({K,{}});
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // we have a bin that has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
// Test data.. 
int main() {
 
 size_t max_capacity = 500;
 std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for (auto &i : obj) {
 u[x++] = i;
 }
 
 auto bins = make_bins(u,max_capacity); // 69,99
 for (auto& bin: bins) {
 size_t wt = 0;
 std::cout << max_capacity - bin.first << ": [";
 for (auto& i: bin.second) {
 wt += i.second;
 std::cout << '{' << i.first << ',' << i.second << '}';
 }
 std::cout << "]\n";
 }
 return 0;
}

context, history, corrections, further thoughts.

Source Link

edited Aug 17, 2022 at 15:16

Konchog

edited Aug 17, 2022 at 15:16

Konchog

The need (context).

I'm lookingSeveral times a day, we need to improve on this bin packing codemigrate a set of digital assets from one system to another. The process uses memory (sizethe assets are held in memory for a short time) and we need to do our best to keep the memory capacity down. Fortunately, possible bugswe have some flexibility (there is no hard ceiling) but there is a noticeable performance improvement if we keep our migration to about 50Mb of assets at a time. It seems

Each migration consists of about 15,000 digital assets, ranging from a hundred bytes or so, up to work better than manywell over 50Mb (more complexbut very rarely) algorithms. A typical (std) distribution of our assets has a mean (μ) of 213777.0 and a standard deviation (σ) of 1591525.0 - this isn't very accurate - (there's a slight pull towards the low end and then a few very big assets), but maybe it's no good? I enough.

Each asset has a unique id and, along with some other metadata, we have its size available to us. Although we could use an 'asset' struct, for flexibility sake (and because I am usingunused to templates), I chose to use a pair<size_t,size_t> to represent each asset - the first being the id, the second being the actual size of the asset. (size_t is suitable for both the id and the asset size). I know structs would be more suitable, and will make the change as suggested below.

Therefore, it seemed reasonable (still does) to use a bin-packing solution (a 1-D knapsack-problem).

The criteria of assessment.

The calculation speed of the bin-packing solution is not very important (we are looking at only 15k assets), and neither is a terribly optimal solution. The primary criterion was ease of understanding, and ease of use. Some of our juniors have never heard of bin-packing, and finding a reasonably easy to read method with few lines is more important than a very generalised, maximally optimal, super-fast solution.

The search.

Wikipedia, StackOverflow, and Google are always the first places to look, of course; and bin-packing is a very well-addressed area.

From these, I found the code by Bastian Rieck on his bin-packing GitHub repo , specifically the max_rest_pq function that uses a priority queue.

While it worked fine, I am not familiar with std::priority_queue, and likewise, it seemed only to store the size of each bin, not the contents of each bin. I chose to use pair size_t/size_t for weightasset-id/object idweight - I'm not so good at templates, but structs will probably be used in the deployed solution.

The novelty(?!)

Instead, I chose to replace Bastian's use of a std::priority_queue with a std::multimap, by exploiting its lower_bound() method (The map's key here represents space available, not size of current fill). Likewise, using extract()/insert() instead of pop()/push() found in std::priority_queue.

Why I asked this question.

While I may be an experienced programmer, I am not a good programmer. I'm looking to improve on the solution I wrote.
It seems to work well against than many other (more complex, lengthy) ~~(削除) algorithms (削除ここまで)~~ solutions, but maybe it's no good.

The code as originally presented..

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 bins.insert({K,{}});
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // we have a bin that has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
// Test data.. 
int main() {
 
 size_t max_capacity = 500;
 std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for (auto &i : obj) {
 u[x++] = i;
 }
 
 auto bins = make_bins(u,max_capacity); // 69,99
 for (auto& bin: bins) {
 size_t wt = 0;
 std::cout << max_capacity - bin.first << ": [";
 for (auto& i: bin.second) {
 wt += i.second;
 std::cout << '{' << i.first << ',' << i.second << '}';
 }
 std::cout << "]\n";
 }
 return 0;
}

Addenda...

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_t,size_t>>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // bin has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
int main() {
 std::random_device rd;
 std::mt19937 gen(rd());
 std::normal_distribution<> d{213777.0,1591525.0};
 size_t count = 15000;
 std::vector<size_t> max_capacities = {500000,5000000,50000000,500000000,5000000000};
 size_t min_bins=SIZE_MAX, max_bins=0;
 for (auto& max_capacity: max_capacities) {
 for (size_t a = 0; a < 2; a++) {
 for (size_t t = 0; t < 100; t++) { // try this 100 times
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for(int i=0; i < count; ++i) {
 u[x++] = std::round(std::abs(d(gen)));
 }
 std::vector<std::pair<size_t,size_t>> v;
 for (auto& item: u) {
 v.push_back(item);
 }
 if (a == 0) {
 struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { return b.second < a.second; } } pr_gte;
 std::sort(v.begin(), v.end(), pr_gte);
 }
 auto bins = make_bins(v,max_capacity);
 min_bins = min_bins < bins.size() ? min_bins : bins.size();
 max_bins = max_bins > bins.size() ? max_bins : bins.size();
 }
 std::cout << max_capacity << (a==0? " sorted: " : " unsorted: ") << min_bins << " – " << max_bins << std::endl;
 min_bins=SIZE_MAX; max_bins=0;
 }
 }
 std::cout << std::endl;
 return 0;
}

across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4

I'm looking to improve on this bin packing code. (size, possible bugs). It seems to work better than many (more complex) algorithms, but maybe it's no good? I am using pair size_t/size_t for weight/object id - I'm not so good at templates.

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 bins.insert({K,{}});
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // we have a bin that has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
// Test data.. 
int main() {
 
 size_t max_capacity = 500;
 std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for (auto &i : obj) {
 u[x++] = i;
 }
 
 auto bins = make_bins(u,max_capacity); // 69,99
 for (auto& bin: bins) {
 size_t wt = 0;
 std::cout << max_capacity - bin.first << ": [";
 for (auto& i: bin.second) {
 wt += i.second;
 std::cout << '{' << i.first << ',' << i.second << '}';
 }
 std::cout << "]\n";
 }
 return 0;
}

The need (context).

Several times a day, we need to migrate a set of digital assets from one system to another. The process uses memory (the assets are held in memory for a short time) and we need to do our best to keep the memory capacity down. Fortunately, we have some flexibility (there is no hard ceiling) but there is a noticeable performance improvement if we keep our migration to about 50Mb of assets at a time.

Each migration consists of about 15,000 digital assets, ranging from a hundred bytes or so, up to well over 50Mb (but very rarely). A typical (std) distribution of our assets has a mean (μ) of 213777.0 and a standard deviation (σ) of 1591525.0 - this isn't very accurate - (there's a slight pull towards the low end and then a few very big assets), but it's good enough.

Each asset has a unique id and, along with some other metadata, we have its size available to us. Although we could use an 'asset' struct, for flexibility sake (and because I am unused to templates), I chose to use a pair<size_t,size_t> to represent each asset - the first being the id, the second being the actual size of the asset. (size_t is suitable for both the id and the asset size). I know structs would be more suitable, and will make the change as suggested below.

Therefore, it seemed reasonable (still does) to use a bin-packing solution (a 1-D knapsack-problem).

The criteria of assessment.

The search.

Wikipedia, StackOverflow, and Google are always the first places to look, of course; and bin-packing is a very well-addressed area.

From these, I found the code by Bastian Rieck on his bin-packing GitHub repo , specifically the max_rest_pq function that uses a priority queue.

While it worked fine, I am not familiar with std::priority_queue, and likewise, it seemed only to store the size of each bin, not the contents of each bin. I chose to use pair size_t/size_t for asset-id/weight - I'm not so good at templates, but structs will probably be used in the deployed solution.

The novelty(?!)

Why I asked this question.

The code as originally presented..

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // we have a bin that has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
// Test data.. 
int main() {
 
 size_t max_capacity = 500;
 std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for (auto &i : obj) {
 u[x++] = i;
 }
 
 auto bins = make_bins(u,max_capacity); // 69,99
 for (auto& bin: bins) {
 size_t wt = 0;
 std::cout << max_capacity - bin.first << ": [";
 for (auto& i: bin.second) {
 wt += i.second;
 std::cout << '{' << i.first << ',' << i.second << '}';
 }
 std::cout << "]\n";
 }
 return 0;
}

Addenda...

#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_t,size_t>>& objects, size_t K) {
 std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
 for(auto& item: objects) { //ID-Weight
 if (item.second >= K) {
 bins.insert({0,{item}});
 } else {
 auto bin = bins.lower_bound(item.second); // bin has space.
 if (bin != bins.end()) {
 auto node = bins.extract(bin);
 node.key() = node.key() - item.second;
 node.mapped().push_back(item);
 bins.insert(std::move(node));
 } else {
 bins.insert({K-item.second,{item}});
 }
 }
 }
 return bins;
}
int main() {
 std::random_device rd;
 std::mt19937 gen(rd());
 std::normal_distribution<> d{213777.0,1591525.0};
 size_t count = 15000;
 std::vector<size_t> max_capacities = {500000,5000000,50000000,500000000,5000000000};
 size_t min_bins=SIZE_MAX, max_bins=0;
 for (auto& max_capacity: max_capacities) {
 for (size_t a = 0; a < 2; a++) {
 for (size_t t = 0; t < 100; t++) { // try this 100 times
 std::unordered_map<size_t,size_t> u;
 size_t x = 1002;
 for(int i=0; i < count; ++i) {
 u[x++] = std::round(std::abs(d(gen)));
 }
 std::vector<std::pair<size_t,size_t>> v;
 for (auto& item: u) {
 v.push_back(item);
 }
 if (a == 0) {
 struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { return b.second < a.second; } } pr_gte;
 std::sort(v.begin(), v.end(), pr_gte);
 }
 auto bins = make_bins(v,max_capacity);
 min_bins = min_bins < bins.size() ? min_bins : bins.size();
 max_bins = max_bins > bins.size() ? max_bins : bins.size();
 }
 std::cout << max_capacity << (a==0? " sorted: " : " unsorted: ") << min_bins << " – " << max_bins << std::endl;
 min_bins=SIZE_MAX; max_bins=0;
 }
 }
 std::cout << std::endl;
 return 0;
}

across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4