- 145.5k
- 22
- 190
- 478
The code as originally presented..
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // we have a bin that has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K - item.second,{item}});
}
}
}
return bins;
}
// Test data..
int main() {
size_t max_capacity = 500;
std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for (auto &i : obj) {
u[x++] = i;
}
auto bins = make_bins(u,max_capacity); // 69,99
for (auto& bin: bins) {
size_t wt = 0;
std::cout << max_capacity - bin.first << ": [";
for (auto& i: bin.second) {
wt += i.second;
std::cout << '{' << i.first << ',' << i.second << '}';
}
std::cout << "]\n";
}
return 0;
}
Addenda...
The test above is not very accurate, and does not reflect usage. Instead, following the thoughts offered by g-sliepen below, I modified the method to use a vector input, in order to test sorted (heavy first) weights, and likewise a better sample generation.
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_tunordered_map<size_t,size_t>>&size_t>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
bins.insert({K,{}});
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // we have a bin that has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
int main() {
// Test std::random_devicedata.. rd;
std::mt19937int gen(rdmain());
{
std::normal_distribution<> d{213777.0,1591525.0};
size_t countmax_capacity = 15000;500;
std::vector<size_t> max_capacitiesobj = {5000002,500000042,500000002,50000000032,500000000021,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
size_t min_bins=SIZE_MAX, max_bins=0;
for (auto& max_capacity: max_capacities) {
for (size_t a = 0; a < 2; a++) {
for (size_t t = 0; t < 100; t++) { // try this 100 times
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for(int i=0; i(auto <&i count;: ++iobj) {
u[x++] = std::round(std::abs(d(gen)));i;
}
std::vector<std::pair<size_t,size_t>> v;
auto bins for= make_bins(auto& item: u) {
v.push_back(item,max_capacity); // }69,99
iffor (aauto& ==bin: 0bins) {
size_t wt = 0;
struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { returncout b.second<< <max_capacity - abin.second; } } pr_gte;
first << std:":sort(v.begin(), v.end(), pr_gte);
}[";
auto bins =for make_bins(v,max_capacity);
min_bins = min_bins < bins.size() ? min_binsauto& i: binsbin.size(second);
{
max_bins = max_bins > bins.size() ? max_binswt :+= binsi.size();
}second;
std::cout << max_capacity << (a==0? " sorted: " : " unsorted: ") '{' << min_binsi.first << " – "',' << max_binsi.second << std::endl;
min_bins=SIZE_MAX; max_bins=0;'}';
}
}
std::cout << std::endl;"]\n";
}
return 0;
}
I found that, as the maximum bin size increases, pre-sorted weights sometimes miss a trick and produce more bins than are strictly necessary. But in general, the edge would be that sorted bins pay off - but not by much. My conclusion is that pre-sorting, for this purpose, is probably unnecessary.
across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4
The code as originally presented..
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // we have a bin that has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K - item.second,{item}});
}
}
}
return bins;
}
// Test data..
int main() {
size_t max_capacity = 500;
std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for (auto &i : obj) {
u[x++] = i;
}
auto bins = make_bins(u,max_capacity); // 69,99
for (auto& bin: bins) {
size_t wt = 0;
std::cout << max_capacity - bin.first << ": [";
for (auto& i: bin.second) {
wt += i.second;
std::cout << '{' << i.first << ',' << i.second << '}';
}
std::cout << "]\n";
}
return 0;
}
Addenda...
The test above is not very accurate, and does not reflect usage. Instead, following the thoughts offered by g-sliepen below, I modified the method to use a vector input, in order to test sorted (heavy first) weights, and likewise a better sample generation.
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_t,size_t>>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // bin has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
int main() {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<> d{213777.0,1591525.0};
size_t count = 15000;
std::vector<size_t> max_capacities = {500000,5000000,50000000,500000000,5000000000};
size_t min_bins=SIZE_MAX, max_bins=0;
for (auto& max_capacity: max_capacities) {
for (size_t a = 0; a < 2; a++) {
for (size_t t = 0; t < 100; t++) { // try this 100 times
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for(int i=0; i < count; ++i) {
u[x++] = std::round(std::abs(d(gen)));
}
std::vector<std::pair<size_t,size_t>> v;
for (auto& item: u) {
v.push_back(item); }
if (a == 0) {
struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { return b.second < a.second; } } pr_gte;
std::sort(v.begin(), v.end(), pr_gte);
}
auto bins = make_bins(v,max_capacity);
min_bins = min_bins < bins.size() ? min_bins : bins.size();
max_bins = max_bins > bins.size() ? max_bins : bins.size();
}
std::cout << max_capacity << (a==0? " sorted: " : " unsorted: ") << min_bins << " – " << max_bins << std::endl;
min_bins=SIZE_MAX; max_bins=0;
}
}
std::cout << std::endl;
return 0;
}
I found that, as the maximum bin size increases, pre-sorted weights sometimes miss a trick and produce more bins than are strictly necessary. But in general, the edge would be that sorted bins pay off - but not by much. My conclusion is that pre-sorting, for this purpose, is probably unnecessary.
across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4
The code
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
bins.insert({K,{}});
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // we have a bin that has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
// Test data..
int main() {
size_t max_capacity = 500;
std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for (auto &i : obj) {
u[x++] = i;
}
auto bins = make_bins(u,max_capacity); // 69,99
for (auto& bin: bins) {
size_t wt = 0;
std::cout << max_capacity - bin.first << ": [";
for (auto& i: bin.second) {
wt += i.second;
std::cout << '{' << i.first << ',' << i.second << '}';
}
std::cout << "]\n";
}
return 0;
}
The need (context).
I'm lookingSeveral times a day, we need to improve on this bin packing codemigrate a set of digital assets from one system to another. The process uses memory (sizethe assets are held in memory for a short time) and we need to do our best to keep the memory capacity down. Fortunately, possible bugswe have some flexibility (there is no hard ceiling) but there is a noticeable performance improvement if we keep our migration to about 50Mb of assets at a time. It seems
Each migration consists of about 15,000 digital assets, ranging from a hundred bytes or so, up to work better than manywell over 50Mb (more complexbut very rarely) algorithms. A typical (std) distribution of our assets has a mean (μ) of 213777.0 and a standard deviation (σ) of 1591525.0 - this isn't very accurate - (there's a slight pull towards the low end and then a few very big assets), but maybe it's no good? I enough.
Each asset has a unique id and, along with some other metadata, we have its size available to us. Although we could use an 'asset' struct, for flexibility sake (and because I am usingunused to templates), I chose to use a pair<size_t,size_t> to represent each asset - the first being the id, the second being the actual size of the asset. (size_t is suitable for both the id and the asset size). I know structs would be more suitable, and will make the change as suggested below.
Therefore, it seemed reasonable (still does) to use a bin-packing solution (a 1-D knapsack-problem).
The criteria of assessment.
The calculation speed of the bin-packing solution is not very important (we are looking at only 15k assets), and neither is a terribly optimal solution. The primary criterion was ease of understanding, and ease of use. Some of our juniors have never heard of bin-packing, and finding a reasonably easy to read method with few lines is more important than a very generalised, maximally optimal, super-fast solution.
The search.
Wikipedia, StackOverflow, and Google are always the first places to look, of course; and bin-packing is a very well-addressed area.
From these, I found the code by Bastian Rieck on his bin-packing GitHub repo , specifically the max_rest_pq function that uses a priority queue.
While it worked fine, I am not familiar with std::priority_queue, and likewise, it seemed only to store the size of each bin, not the contents of each bin. I chose to use pair size_t/size_t for weightasset-id/object idweight - I'm not so good at templates, but structs will probably be used in the deployed solution.
The novelty(?!)
Instead, I chose to replace Bastian's use of a std::priority_queue with a std::multimap, by exploiting its lower_bound() method (The map's key here represents space available, not size of current fill). Likewise, using extract()/insert() instead of pop()/push() found in std::priority_queue.
Why I asked this question.
While I may be an experienced programmer, I am not a good programmer. I'm looking to improve on the solution I wrote.
It seems to work well against than many other (more complex, lengthy) (削除) algorithms (削除ここまで) solutions, but maybe it's no good.
The code as originally presented..
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
bins.insert({K,{}});
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // we have a bin that has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
// Test data..
int main() {
size_t max_capacity = 500;
std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for (auto &i : obj) {
u[x++] = i;
}
auto bins = make_bins(u,max_capacity); // 69,99
for (auto& bin: bins) {
size_t wt = 0;
std::cout << max_capacity - bin.first << ": [";
for (auto& i: bin.second) {
wt += i.second;
std::cout << '{' << i.first << ',' << i.second << '}';
}
std::cout << "]\n";
}
return 0;
}
Addenda...
The test above is not very accurate, and does not reflect usage. Instead, following the thoughts offered by g-sliepen below, I modified the method to use a vector input, in order to test sorted (heavy first) weights, and likewise a better sample generation.
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_t,size_t>>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // bin has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
int main() {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<> d{213777.0,1591525.0};
size_t count = 15000;
std::vector<size_t> max_capacities = {500000,5000000,50000000,500000000,5000000000};
size_t min_bins=SIZE_MAX, max_bins=0;
for (auto& max_capacity: max_capacities) {
for (size_t a = 0; a < 2; a++) {
for (size_t t = 0; t < 100; t++) { // try this 100 times
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for(int i=0; i < count; ++i) {
u[x++] = std::round(std::abs(d(gen)));
}
std::vector<std::pair<size_t,size_t>> v;
for (auto& item: u) {
v.push_back(item);
}
if (a == 0) {
struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { return b.second < a.second; } } pr_gte;
std::sort(v.begin(), v.end(), pr_gte);
}
auto bins = make_bins(v,max_capacity);
min_bins = min_bins < bins.size() ? min_bins : bins.size();
max_bins = max_bins > bins.size() ? max_bins : bins.size();
}
std::cout << max_capacity << (a==0? " sorted: " : " unsorted: ") << min_bins << " – " << max_bins << std::endl;
min_bins=SIZE_MAX; max_bins=0;
}
}
std::cout << std::endl;
return 0;
}
I found that, as the maximum bin size increases, pre-sorted weights sometimes miss a trick and produce more bins than are strictly necessary. But in general, the edge would be that sorted bins pay off - but not by much. My conclusion is that pre-sorting, for this purpose, is probably unnecessary.
across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4
I'm looking to improve on this bin packing code. (size, possible bugs). It seems to work better than many (more complex) algorithms, but maybe it's no good? I am using pair size_t/size_t for weight/object id - I'm not so good at templates.
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
bins.insert({K,{}});
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // we have a bin that has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
// Test data..
int main() {
size_t max_capacity = 500;
std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for (auto &i : obj) {
u[x++] = i;
}
auto bins = make_bins(u,max_capacity); // 69,99
for (auto& bin: bins) {
size_t wt = 0;
std::cout << max_capacity - bin.first << ": [";
for (auto& i: bin.second) {
wt += i.second;
std::cout << '{' << i.first << ',' << i.second << '}';
}
std::cout << "]\n";
}
return 0;
}
The need (context).
Several times a day, we need to migrate a set of digital assets from one system to another. The process uses memory (the assets are held in memory for a short time) and we need to do our best to keep the memory capacity down. Fortunately, we have some flexibility (there is no hard ceiling) but there is a noticeable performance improvement if we keep our migration to about 50Mb of assets at a time.
Each migration consists of about 15,000 digital assets, ranging from a hundred bytes or so, up to well over 50Mb (but very rarely). A typical (std) distribution of our assets has a mean (μ) of 213777.0 and a standard deviation (σ) of 1591525.0 - this isn't very accurate - (there's a slight pull towards the low end and then a few very big assets), but it's good enough.
Each asset has a unique id and, along with some other metadata, we have its size available to us. Although we could use an 'asset' struct, for flexibility sake (and because I am unused to templates), I chose to use a pair<size_t,size_t> to represent each asset - the first being the id, the second being the actual size of the asset. (size_t is suitable for both the id and the asset size). I know structs would be more suitable, and will make the change as suggested below.
Therefore, it seemed reasonable (still does) to use a bin-packing solution (a 1-D knapsack-problem).
The criteria of assessment.
The calculation speed of the bin-packing solution is not very important (we are looking at only 15k assets), and neither is a terribly optimal solution. The primary criterion was ease of understanding, and ease of use. Some of our juniors have never heard of bin-packing, and finding a reasonably easy to read method with few lines is more important than a very generalised, maximally optimal, super-fast solution.
The search.
Wikipedia, StackOverflow, and Google are always the first places to look, of course; and bin-packing is a very well-addressed area.
From these, I found the code by Bastian Rieck on his bin-packing GitHub repo , specifically the max_rest_pq function that uses a priority queue.
While it worked fine, I am not familiar with std::priority_queue, and likewise, it seemed only to store the size of each bin, not the contents of each bin. I chose to use pair size_t/size_t for asset-id/weight - I'm not so good at templates, but structs will probably be used in the deployed solution.
The novelty(?!)
Instead, I chose to replace Bastian's use of a std::priority_queue with a std::multimap, by exploiting its lower_bound() method (The map's key here represents space available, not size of current fill). Likewise, using extract()/insert() instead of pop()/push() found in std::priority_queue.
Why I asked this question.
While I may be an experienced programmer, I am not a good programmer. I'm looking to improve on the solution I wrote.
It seems to work well against than many other (more complex, lengthy) (削除) algorithms (削除ここまで) solutions, but maybe it's no good.
The code as originally presented..
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> Media::make_bins(const std::unordered_map<size_t,size_t>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // we have a bin that has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
// Test data..
int main() {
size_t max_capacity = 500;
std::vector<size_t> obj = {2,42,2,32,21,32,19,2,4,2,4,5,2,5,6,6,96,37,34,54,80,55,84,20,74,50,56,95,40,93,28,37,17,101,28,82,55,58,42,101,29,54,88,73,4,37,22,25,71,93,99,51,14,95,82,90,99,66,63,58,14,73,7,6,98,63,60,79,60,49,91,58,68,52,6,51,69,82,36,71,7,28,88,42,80,81,42,32,52,93,53,3,20,15,8,211,91,52,38,46,79,60,76,86,22,50,101,70,92,43,27,6,33,19,15,30,99,87,52,59,38,92,71,85,32,76,21,10,82,96,61,30,9,75,39,14,6,31,28,75,61,33,85,42,2,41,43,64,3,68,60,77,39,61,63,38,25,66,93,30,75,71,31,23,67,20,93,2,4,45,51,81,23,25,27,2,17,66,17,32,26,31,35,54,2,5,65,51,31,84,42,36,2,50,46,22,53,50,84,84,65,51,72,54,99,46,90,44,60,2,40,38,80,26,95,2,94,56,66,31,25,18,89,42,59,3,86,50,97,18,58,79,100,32,82,94,66,87,61,32,85,87,48,76,7,24,33,19,64,15,60,10,47,7,44,80,100,72,39,61,17,83,48,100,79,52,20,26,66,50,64,26,44,85,22,68,62,72,9,16,2,35,35,14,15,9,8,33,93,50,21,30,75,51,64,40,27,23,34,83,29,35,58,17,81,7,40,43,62,35,10,121,95,30,92,71,16,16,43,16,76,40,33,6,26,23,68,66,80,92,101,52,11,60,71,18,65,11,42,14,5,49,2,89,80,23,121,5,9,53,58,23,2,10,98,19,29,38,91,57,51,9,40,76,62,96,83,35,96,64,4,46,40,5,28,35,26,57,101,78,63,59,3,68,61,23,61,101,70,76,37,74,46,43,30,66,32,73,22,6,49,33,23,91,111,39,76,98,7,78,72,50,43,92,56,15};
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for (auto &i : obj) {
u[x++] = i;
}
auto bins = make_bins(u,max_capacity); // 69,99
for (auto& bin: bins) {
size_t wt = 0;
std::cout << max_capacity - bin.first << ": [";
for (auto& i: bin.second) {
wt += i.second;
std::cout << '{' << i.first << ',' << i.second << '}';
}
std::cout << "]\n";
}
return 0;
}
Addenda...
The test above is not very accurate, and does not reflect usage. Instead, following the thoughts offered by g-sliepen below, I modified the method to use a vector input, in order to test sorted (heavy first) weights, and likewise a better sample generation.
#include <vector>
#include <iostream>
#include <map>
#include <unordered_map>
#include <cmath>
#include <random>
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> make_bins(const std::vector<std::pair<size_t,size_t>>& objects, size_t K) {
std::multimap<size_t,std::vector<std::pair<size_t,size_t>>> bins;
for(auto& item: objects) { //ID-Weight
if (item.second >= K) {
bins.insert({0,{item}});
} else {
auto bin = bins.lower_bound(item.second); // bin has space.
if (bin != bins.end()) {
auto node = bins.extract(bin);
node.key() = node.key() - item.second;
node.mapped().push_back(item);
bins.insert(std::move(node));
} else {
bins.insert({K-item.second,{item}});
}
}
}
return bins;
}
int main() {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<> d{213777.0,1591525.0};
size_t count = 15000;
std::vector<size_t> max_capacities = {500000,5000000,50000000,500000000,5000000000};
size_t min_bins=SIZE_MAX, max_bins=0;
for (auto& max_capacity: max_capacities) {
for (size_t a = 0; a < 2; a++) {
for (size_t t = 0; t < 100; t++) { // try this 100 times
std::unordered_map<size_t,size_t> u;
size_t x = 1002;
for(int i=0; i < count; ++i) {
u[x++] = std::round(std::abs(d(gen)));
}
std::vector<std::pair<size_t,size_t>> v;
for (auto& item: u) {
v.push_back(item);
}
if (a == 0) {
struct { bool operator()(std::pair<size_t,size_t> a, std::pair<size_t,size_t> b) const { return b.second < a.second; } } pr_gte;
std::sort(v.begin(), v.end(), pr_gte);
}
auto bins = make_bins(v,max_capacity);
min_bins = min_bins < bins.size() ? min_bins : bins.size();
max_bins = max_bins > bins.size() ? max_bins : bins.size();
}
std::cout << max_capacity << (a==0? " sorted: " : " unsorted: ") << min_bins << " – " << max_bins << std::endl;
min_bins=SIZE_MAX; max_bins=0;
}
}
std::cout << std::endl;
return 0;
}
I found that, as the maximum bin size increases, pre-sorted weights sometimes miss a trick and produce more bins than are strictly necessary. But in general, the edge would be that sorted bins pay off - but not by much. My conclusion is that pre-sorting, for this purpose, is probably unnecessary.
across 100 iterations each..
bin_sz sorted. min_bins – max_bins
0.5Mb sorted: 13073 – 13253
0.5Mb unsorted: 13119 – 13281
5.0Mb sorted: 3792 – 3889
5.0Mb unsorted: 3807 – 3905
50.0Mb sorted: 378 – 390
50.0Mb unsorted: 379 – 390
500Mb sorted: 38 – 40
500Mb unsorted: 38 – 40
5000Mb sorted: 4 – 4
5000Mb unsorted: 4 – 4