12#ifndef __BOXES_BLOOM_FILTER_HPP__ 
   13#define __BOXES_BLOOM_FILTER_HPP__ 
   15#include "compiler.hpp" 
   25namespace boxes::filter::bloom {
 
   34template <
typename KeyT, hash::HashFamily64 HashT> 
class Bloom {
 
   35  using Base = uint64_t;
 
   36  using Bitmap = std::vector<Base>;
 
   46      : bitmapSize{calcBitmapElems(m)}, bitmapBits{bitmapSize * baseSize()},
 
   47        h{std::move(h)}, bitmap(bitmapSize, 0) {}
 
   56    for (std::size_t i = 0; i < 
k(); ++i) {
 
   57      const auto hResult = hash::hashType64(h, i, key);
 
   58      const auto idx = hResult % bitmapBits;
 
   74    for (std::size_t i = 0; i < 
k(); ++i) {
 
   75      const auto hResult = hash::hashType64(h, i, key);
 
   76      const auto idx = hResult % bitmapBits;
 
   89  std::size_t 
capacity() const BOXES_NOTHROW { 
return bitmapBits; }
 
   96  std::size_t 
k() const BOXES_NOTHROW { 
return h.size(); }
 
  103  std::size_t 
size() const BOXES_NOTHROW {
 
  104    const std::size_t x = 
count();
 
  105    return -1 * (bitmapBits / 
k()) * std::log(1 - x / bitmapBits);
 
  114    return std::accumulate(
 
  115        bitmap.begin(), bitmap.end(), 0,
 
  116        [](
const auto &a, 
const auto &b) { return a + std::popcount(b); });
 
  125    std::swap(bitmapSize, other.bitmapSize);
 
  126    std::swap(bitmapBits, other.bitmapBits);
 
  127    std::swap(h, other.h);
 
  128    bitmap.swap(other.bitmap);
 
  134  void clear() BOXES_NOTHROW { std::fill(bitmap.begin(), bitmap.end(), 0); }
 
  138  std::size_t bitmapSize;
 
  139  std::size_t bitmapBits;
 
  144  bool at(std::size_t index)
 const {
 
  145    const auto &res = std::div(
static_cast<long long>(index), baseSize());
 
  146    return bitmap.at(res.quot) & (
static_cast<Base
>(0x01) << res.rem);
 
  149  void set(std::size_t index) {
 
  150    const auto &res = std::div(
static_cast<long long>(index), baseSize());
 
  151    bitmap[res.quot] |= (
static_cast<Base
>(0x01) << res.rem);
 
  154  static constexpr std::size_t baseSize() { 
return sizeof(Base) * 8; }
 
  156  static constexpr std::size_t calcBitmapElems(std::size_t reqBits) {
 
  157    return (reqBits / baseSize()) + 1;
 
  172std::size_t 
calcSize(std::size_t maxElements, 
double collisionProb) {
 
  173  return static_cast<std::size_t
>(
 
  174      std::ceil(maxElements * std::log(collisionProb)) /
 
  175      std::log(1 / std::pow(2, std::log(2))));
 
  188std::size_t 
calcHashes(std::size_t bloomSize, std::size_t maxElements) {
 
  189  return std::round((
double)(std::log(2) * bloomSize) / maxElements);
 
  192template <
typename T, hash::Hash64 HT>
 
  193auto makeWithHashWithSeedFamily(std::size_t maxElements, 
double collisionProb) {
 
  194  using namespace boxes::hash;
 
  195  const auto m = calcSize(maxElements, collisionProb);
 
  196  const std::size_t k = calcHashes(m, maxElements);
 
  200template <
typename T, hash::Hash64 H1T, hash::Hash64 H2T>
 
  201auto makeWithKirschMitzenmacherFamily(std::size_t maxElements,
 
  202                                      double collisionProb, H1T h1, H2T h2) {
 
  203  using namespace boxes::hash;
 
  204  const auto m = 
calcSize(maxElements, collisionProb);
 
  205  const std::size_t k = 
calcHashes(m, maxElements);
 
  207  return Bloom<T, KirschMitzenmacher<H1T, H2T>>{
 
  208      static_cast<std::size_t
>(m),
 
std::size_t calcSize(std::size_t maxElements, double collisionProb)
Utility function to calculate the required size of the bloom filter's bitmap size to store the maximu...
 
std::size_t calcHashes(std::size_t bloomSize, std::size_t maxElements)
Utility function to calculate the number of hash functions required to store maximum number of elemen...
 
A generic bloom filter implementation.
 
bool contains(const KeyT &key) const
Checks if the filter contains the given key.
 
std::size_t count() const
Returns number of bits set in the filter's bitmap.
 
void insert(const KeyT &key)
Inserts key into the filter.
 
Bloom(std::size_t m, HashT h)
Instantiates a new bloom filter.
 
void clear() BOXES_NOTHROW
Clears filter's bitmap.
 
std::size_t k() const BOXES_NOTHROW
Number of hash functions the filter uses.
 
std::size_t capacity() const BOXES_NOTHROW
Returns number of bits in the filter.
 
void swap(Bloom &other)
Swaps the contents of this filter with another one.
 
std::size_t size() const BOXES_NOTHROW
Estimated number of elements in the filter.
 
Abstracts a hash family functor that requires a seed n times with different seed.
 
Contains utilities for instantiating hash functions and hash families.
 
KirschMitzenmacher< H1, H2 > makeKirschMitzenmacher(H1 h1, H2 h2, std::size_t k)
Helper function to instantiate a Kirsch-Mitzenmacher hash family.
 
Contains wrappers for various hash functions that can be used in Bloom filters.