libboxes
boxes is a set of specialised containers built on top of STL
All Classes Files Functions Pages Concepts
bloom_filter.hpp
Go to the documentation of this file.
1
12#ifndef __BOXES_BLOOM_FILTER_HPP__
13#define __BOXES_BLOOM_FILTER_HPP__
14
15#include "compiler.hpp"
16#include "hash_utils.hpp"
17#include "hashes.hpp"
18
19#include <algorithm>
20#include <bit>
21#include <cmath>
22#include <numeric>
23#include <vector>
24
25namespace boxes::filter::bloom {
26
34template <typename KeyT, hash::HashFamily64 HashT> class Bloom {
35 using Base = uint64_t;
36 using Bitmap = std::vector<Base>;
37
38public:
45 Bloom(std::size_t m, HashT h)
46 : bitmapSize{calcBitmapElems(m)}, bitmapBits{bitmapSize * baseSize()},
47 h{std::move(h)}, bitmap(bitmapSize, 0) {}
48
54 void insert(const KeyT &key) {
55 h.reset();
56 for (std::size_t i = 0; i < k(); ++i) {
57 const auto hResult = hash::hashType64(h, i, key);
58 const auto idx = hResult % bitmapBits;
59 set(idx);
60 }
61 }
62
72 bool contains(const KeyT &key) const {
73 h.reset();
74 for (std::size_t i = 0; i < k(); ++i) {
75 const auto hResult = hash::hashType64(h, i, key);
76 const auto idx = hResult % bitmapBits;
77 if (!at(idx)) {
78 return false;
79 }
80 }
81 return true;
82 }
83
89 std::size_t capacity() const BOXES_NOTHROW { return bitmapBits; }
90
96 std::size_t k() const BOXES_NOTHROW { return h.size(); }
97
103 std::size_t size() const BOXES_NOTHROW {
104 const std::size_t x = count();
105 return -1 * (bitmapBits / k()) * std::log(1 - x / bitmapBits);
106 }
107
113 std::size_t count() const {
114 return std::accumulate(
115 bitmap.begin(), bitmap.end(), 0,
116 [](const auto &a, const auto &b) { return a + std::popcount(b); });
117 }
118
124 void swap(Bloom &other) {
125 std::swap(bitmapSize, other.bitmapSize);
126 std::swap(bitmapBits, other.bitmapBits);
127 std::swap(h, other.h);
128 bitmap.swap(other.bitmap);
129 }
130
134 void clear() BOXES_NOTHROW { std::fill(bitmap.begin(), bitmap.end(), 0); }
135
136protected:
137 // non-const to allow for swapping
138 std::size_t bitmapSize;
139 std::size_t bitmapBits;
140
141 mutable HashT h;
142 Bitmap bitmap;
143
144 bool at(std::size_t index) const {
145 const auto &res = std::div(static_cast<long long>(index), baseSize());
146 return bitmap.at(res.quot) & (static_cast<Base>(0x01) << res.rem);
147 }
148
149 void set(std::size_t index) {
150 const auto &res = std::div(static_cast<long long>(index), baseSize());
151 bitmap[res.quot] |= (static_cast<Base>(0x01) << res.rem);
152 }
153
154 static constexpr std::size_t baseSize() { return sizeof(Base) * 8; }
155
156 static constexpr std::size_t calcBitmapElems(std::size_t reqBits) {
157 return (reqBits / baseSize()) + 1;
158 }
159};
160
172std::size_t calcSize(std::size_t maxElements, double collisionProb) {
173 return static_cast<std::size_t>(
174 std::ceil(maxElements * std::log(collisionProb)) /
175 std::log(1 / std::pow(2, std::log(2))));
176}
177
188std::size_t calcHashes(std::size_t bloomSize, std::size_t maxElements) {
189 return std::round((double)(std::log(2) * bloomSize) / maxElements);
190}
191
192template <typename T, hash::Hash64 HT>
193auto makeWithHashWithSeedFamily(std::size_t maxElements, double collisionProb) {
194 using namespace boxes::hash;
195 const auto m = calcSize(maxElements, collisionProb);
196 const std::size_t k = calcHashes(m, maxElements);
197 return Bloom<T, HashWithSeedFamily<HT>>{m, HashWithSeedFamily<HT>{k}};
198}
199
200template <typename T, hash::Hash64 H1T, hash::Hash64 H2T>
201auto makeWithKirschMitzenmacherFamily(std::size_t maxElements,
202 double collisionProb, H1T h1, H2T h2) {
203 using namespace boxes::hash;
204 const auto m = calcSize(maxElements, collisionProb);
205 const std::size_t k = calcHashes(m, maxElements);
206
207 return Bloom<T, KirschMitzenmacher<H1T, H2T>>{
208 static_cast<std::size_t>(m),
209 makeKirschMitzenmacher(std::move(h1), std::move(h2), k)};
210}
211
212} // namespace boxes::filter::bloom
213
214#endif // __BOXES_BLOOM_FILTER_HPP__
std::size_t calcSize(std::size_t maxElements, double collisionProb)
Utility function to calculate the required size of the bloom filter's bitmap size to store the maximu...
std::size_t calcHashes(std::size_t bloomSize, std::size_t maxElements)
Utility function to calculate the number of hash functions required to store maximum number of elemen...
A generic bloom filter implementation.
bool contains(const KeyT &key) const
Checks if the filter contains the given key.
std::size_t count() const
Returns number of bits set in the filter's bitmap.
void insert(const KeyT &key)
Inserts key into the filter.
Bloom(std::size_t m, HashT h)
Instantiates a new bloom filter.
void clear() BOXES_NOTHROW
Clears filter's bitmap.
std::size_t k() const BOXES_NOTHROW
Number of hash functions the filter uses.
std::size_t capacity() const BOXES_NOTHROW
Returns number of bits in the filter.
void swap(Bloom &other)
Swaps the contents of this filter with another one.
std::size_t size() const BOXES_NOTHROW
Estimated number of elements in the filter.
Abstracts a hash family functor that requires a seed n times with different seed.
Definition: hash_utils.hpp:203
Contains utilities for instantiating hash functions and hash families.
KirschMitzenmacher< H1, H2 > makeKirschMitzenmacher(H1 h1, H2 h2, std::size_t k)
Helper function to instantiate a Kirsch-Mitzenmacher hash family.
Definition: hash_utils.hpp:254
Contains wrappers for various hash functions that can be used in Bloom filters.