12#ifndef __BOXES_BLOOM_FILTER_HPP__
13#define __BOXES_BLOOM_FILTER_HPP__
15#include "compiler.hpp"
25namespace boxes::filter::bloom {
34template <
typename KeyT, hash::HashFamily64 HashT>
class Bloom {
35 using Base = uint64_t;
36 using Bitmap = std::vector<Base>;
46 : bitmapSize{calcBitmapElems(m)}, bitmapBits{bitmapSize * baseSize()},
47 h{std::move(h)}, bitmap(bitmapSize, 0) {}
56 for (std::size_t i = 0; i <
k(); ++i) {
57 const auto hResult = hash::hashType64(h, i, key);
58 const auto idx = hResult % bitmapBits;
74 for (std::size_t i = 0; i <
k(); ++i) {
75 const auto hResult = hash::hashType64(h, i, key);
76 const auto idx = hResult % bitmapBits;
89 std::size_t
capacity() const BOXES_NOTHROW {
return bitmapBits; }
96 std::size_t
k() const BOXES_NOTHROW {
return h.size(); }
103 std::size_t
size() const BOXES_NOTHROW {
104 const std::size_t x =
count();
105 return -1 * (bitmapBits /
k()) * std::log(1 - x / bitmapBits);
114 return std::accumulate(
115 bitmap.begin(), bitmap.end(), 0,
116 [](
const auto &a,
const auto &b) { return a + std::popcount(b); });
125 std::swap(bitmapSize, other.bitmapSize);
126 std::swap(bitmapBits, other.bitmapBits);
127 std::swap(h, other.h);
128 bitmap.swap(other.bitmap);
134 void clear() BOXES_NOTHROW { std::fill(bitmap.begin(), bitmap.end(), 0); }
138 std::size_t bitmapSize;
139 std::size_t bitmapBits;
144 bool at(std::size_t index)
const {
145 const auto &res = std::div(
static_cast<long long>(index), baseSize());
146 return bitmap.at(res.quot) & (
static_cast<Base
>(0x01) << res.rem);
149 void set(std::size_t index) {
150 const auto &res = std::div(
static_cast<long long>(index), baseSize());
151 bitmap[res.quot] |= (
static_cast<Base
>(0x01) << res.rem);
154 static constexpr std::size_t baseSize() {
return sizeof(Base) * 8; }
156 static constexpr std::size_t calcBitmapElems(std::size_t reqBits) {
157 return (reqBits / baseSize()) + 1;
172std::size_t
calcSize(std::size_t maxElements,
double collisionProb) {
173 return static_cast<std::size_t
>(
174 std::ceil(maxElements * std::log(collisionProb)) /
175 std::log(1 / std::pow(2, std::log(2))));
188std::size_t
calcHashes(std::size_t bloomSize, std::size_t maxElements) {
189 return std::round((
double)(std::log(2) * bloomSize) / maxElements);
192template <
typename T, hash::Hash64 HT>
193auto makeWithHashWithSeedFamily(std::size_t maxElements,
double collisionProb) {
194 using namespace boxes::hash;
195 const auto m = calcSize(maxElements, collisionProb);
196 const std::size_t k = calcHashes(m, maxElements);
200template <
typename T, hash::Hash64 H1T, hash::Hash64 H2T>
201auto makeWithKirschMitzenmacherFamily(std::size_t maxElements,
202 double collisionProb, H1T h1, H2T h2) {
203 using namespace boxes::hash;
204 const auto m =
calcSize(maxElements, collisionProb);
205 const std::size_t k =
calcHashes(m, maxElements);
207 return Bloom<T, KirschMitzenmacher<H1T, H2T>>{
208 static_cast<std::size_t
>(m),
std::size_t calcSize(std::size_t maxElements, double collisionProb)
Utility function to calculate the required size of the bloom filter's bitmap size to store the maximu...
std::size_t calcHashes(std::size_t bloomSize, std::size_t maxElements)
Utility function to calculate the number of hash functions required to store maximum number of elemen...
A generic bloom filter implementation.
bool contains(const KeyT &key) const
Checks if the filter contains the given key.
std::size_t count() const
Returns number of bits set in the filter's bitmap.
void insert(const KeyT &key)
Inserts key into the filter.
Bloom(std::size_t m, HashT h)
Instantiates a new bloom filter.
void clear() BOXES_NOTHROW
Clears filter's bitmap.
std::size_t k() const BOXES_NOTHROW
Number of hash functions the filter uses.
std::size_t capacity() const BOXES_NOTHROW
Returns number of bits in the filter.
void swap(Bloom &other)
Swaps the contents of this filter with another one.
std::size_t size() const BOXES_NOTHROW
Estimated number of elements in the filter.
Abstracts a hash family functor that requires a seed n times with different seed.
Contains utilities for instantiating hash functions and hash families.
KirschMitzenmacher< H1, H2 > makeKirschMitzenmacher(H1 h1, H2 h2, std::size_t k)
Helper function to instantiate a Kirsch-Mitzenmacher hash family.
Contains wrappers for various hash functions that can be used in Bloom filters.