6
\$\begingroup\$

So, I'm coding a simple Bloom filter. Current implementation works as expected, but I'm looking for ways to improve efficiency / speed. The code is being tested on a large chunk of data (~30k+ lines), so every little bit counts.

Also: Is there any way I can initialize a vector only one time, not using resize()?

set() computes number of hashes and size of bit array
add() adds element to filter
search() gives an approximate answer, if filter contains element or not
print() prints bit array

main() mostly contains parsing

Here is the test example: https://pastebin.com/3edmX2vH

Here is the code itself:

#include <cmath>
#include <vector>
#include <sstream>
const unsigned long M = 2147483647; 
const unsigned long long Primes[] =
{
 0, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 
 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 263, 269, 271, 277, 281
};
template <class T>
class Filter 
{
 std::vector<bool> bits;
 size_t size;
 size_t numHashes;
 size_t getHash(const T& key,const size_t& hashIndex);
public:
 Filter() {};
 ~Filter() {};
 bool Set(const int& n, const double& p);
 void Add(const T& key);
 void Print(); 
 bool Search(const T& key);
};
template <class T>
bool Filter<T>::Set(const int& n, const double& prob)
{
 size = round((-n * log2(prob)) / log(2));
 numHashes = round(-log2(prob));
 if (size == 0 || numHashes == 0)
 {
 std::cout << "error" << std::endl;
 return false;
 }
 else
 {
 bits.resize(size);
 std::cout << size << " " << numHashes << std::endl;
 return true;
 }
}
template <class T>
void Filter<T>::Add(const T& key)
{
 for (size_t i = 0; i < numHashes; i++)
 {
 size_t index = getHash(key, i);
 bits[index] = true; 
 }
}
template <class T>
bool Filter<T>::Search(const T& key)
{
 for (size_t i = 0; i < numHashes; i++)
 {
 size_t index = getHash(key, i);
 if (!bits[index]) 
 {
 return false; 
 }
 }
 return true;
}
template <class T>
void Filter<T>::Print()
{
 for (size_t i = 0; i < size; i++)
 {
 std::cout << bits[i];
 }
 std:: cout << std::endl;
}
template <class T>
size_t Filter<T>::getHash(const T& key, const size_t& hashIndex)
{
 return (((hashIndex + 1) * (key % M) + Primes[hashIndex + 1]) % M) % size;
}
int main()
{
 Filter<uint64_t> filter;
 std::string command, line;
 uint64_t element;
 double prob;
 bool setFlag = false;
 bool empty = false; 
 while (std::getline(std::cin, line))
 {
 std::istringstream is(line);
 if (is.rdbuf()->in_avail() == 0)
 {
 empty = true;
 }
 command.clear();
 is >> command >> element >> prob;
 if (command == "set")
 {
 if (!setFlag && element != 0 && prob != 0 && prob < 1)
 {
 setFlag = filter.Set(element, prob);
 }
 else
 {
 std::cout << "error" << std::endl;
 }
 }
 else if (command == "add")
 {
 is >> element;
 if (is.rdbuf()->in_avail() == 0 && setFlag)
 {
 filter.Add(element);
 }
 else
 {
 std::cout << "error" << std::endl;
 }
 }
 else if (command == "search")
 {
 is >> element;
 if (is.rdbuf()->in_avail() == 0 && setFlag)
 {
 std::cout << filter.Search(element) <<std::endl;
 }
 else
 {
 std::cout << "error" << std::endl;
 }
 }
 else if (command == "print" && setFlag)
 {
 if (is.rdbuf()->in_avail() == 0)
 {
 filter.Print();
 }
 else
 {
 std::cout << "error" << std::endl;
 }
 }
 else
 {
 if (!empty)
 std::cout << "error" << std::endl;
 }
 }
 return 0;
}
asked Oct 23, 2019 at 14:30
\$\endgroup\$

2 Answers 2

1
\$\begingroup\$

The idiom for setting a vector contained in a class is

Filter::Filter( int s ) : bits( vector<bool>(s) ) { ... };

In your case s is somewhat tricky, so you'll have to write a function for it.

answered Oct 23, 2019 at 15:40
\$\endgroup\$
1
\$\begingroup\$

Some minor things that will improve just a bit, you should precomputed the value of log(2) and put as a constant if is not precomputed by the compiler, and the increments on your loops should be ++i, instead i++

answered Oct 23, 2019 at 20:04
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.