libboxes
boxes is a set of specialised containers built on top of STL
hash_utils.hpp
Go to the documentation of this file.
1
8#ifndef __BOXES_HASH_UTILS_HPP__
9#define __BOXES_HASH_UTILS_HPP__
10
11#include "compiler.hpp"
12
13#include <concepts>
14#include <cstdint>
15#include <random>
16#include <set>
17#include <string>
18#include <vector>
19
20namespace boxes::hash {
21
33template <typename T>
34concept Hash64 = requires(T t, const uint8_t *data, std::size_t len) {
35 { t(data, len) };
36 { t.final() } -> std::same_as<uint64_t>;
37 { t.reset() };
38};
39
56template <typename T>
57concept HashFamily64 =
58 requires(T t, std::size_t i, const uint8_t *data, std::size_t len) {
59 { t(i, data, len) };
60 { t.final(i) } -> std::same_as<uint64_t>;
61 { t.reset() };
62 { t.size() } -> std::same_as<std::size_t>;
63 };
64
78template <typename HashFamilyT, typename T>
79uint64_t hashType64(HashFamilyT &h, std::size_t idx, const T &t) {
80 h(idx, reinterpret_cast<const uint8_t *>(&t), sizeof(T));
81 return h.final(idx);
82}
83
95template <typename HashFamilyT>
96uint64_t hashType64(HashFamilyT &h, std::size_t idx, const std::string &s) {
97 h(idx, reinterpret_cast<const uint8_t *>(s.data()), s.size());
98 return h.final(idx);
99}
100
111template <typename HashFamilyT, typename T>
112uint64_t hashType64(HashFamilyT &h, std::size_t idx, const std::vector<T> &v) {
113 h(reinterpret_cast<const uint8_t *>(v.data()), v.size() * sizeof(T));
114 return h.final(idx);
115}
116
126template <typename HashFamilyT>
127uint64_t hashType64(HashFamilyT &h, std::size_t idx,
128 const std::vector<std::string> &v) {
129 for (const auto &s : v) {
130 h(reinterpret_cast<const uint8_t *>(s.data()), s.size());
131 }
132 return h.final(idx);
133}
134
142template <Hash64 HT> HT makeHashWithRandomSeed() {
143 std::random_device rd;
144 std::mt19937 gen(rd());
145 std::uniform_int_distribution<uint64_t> dis;
146 return HT{dis(gen)};
147}
148
159template <Hash64 H1, Hash64 H2> class KirschMitzenmacher {
160public:
172 explicit KirschMitzenmacher(H1 h1, H2 h2, std::size_t k)
173 : h1{std::move(h1)}, h2{std::move(h2)}, k{k} {}
174
175 void reset() {
176 h1.reset();
177 h2.reset();
178 }
179
180 void operator()(std::size_t idx, const uint8_t *data, std::size_t len) {
181 if (idx == 0) {
182 h1(data, len);
183 h2(data, len);
184 }
185 }
186
187 uint64_t final(std::size_t i) const { return h1.final() + i * h2.final(); }
188
189 std::size_t size() const BOXES_NOTHROW { return k; }
190
191protected:
192 H1 h1;
193 H2 h2;
194 std::size_t k;
195};
196
203template <Hash64 HashT> class HashWithSeedFamily {
204public:
211 explicit HashWithSeedFamily(std::size_t n) {
212 std::random_device rd;
213 std::mt19937 gen(rd());
214 std::uniform_int_distribution<uint64_t> dis;
215 std::set<uint64_t> seeds;
216
217 for (std::size_t i = 0; i < n; ++i) {
218 uint64_t seed = 0;
219 do {
220 // make sure all seeds are unique
221 seed = dis(gen);
222 } while (!seeds.insert(seed).second);
223 hashes.emplace_back(dis(gen));
224 }
225 }
226
227 void reset() {
228 for (auto &h : hashes) {
229 h.reset();
230 }
231 }
232
233 std::size_t size() const BOXES_NOTHROW { return hashes.size(); }
234
235 void operator()(std::size_t idx, const uint8_t *data, std::size_t len) {
236 hashes.at(idx)(data, len);
237 }
238
239 uint64_t final(std::size_t i) const { return hashes.at(i).final(); }
240
241protected:
242 std::vector<HashT> hashes;
243};
244
253template <Hash64 H1, Hash64 H2>
255 return KirschMitzenmacher<H1, H2>{std::move(h1), std::move(h2), k};
256}
257
258} // namespace boxes::hash
259
260#endif // __BOXES_HASH_UTILS_HPP__
Abstracts a hash family functor that requires a seed n times with different seed.
Definition: hash_utils.hpp:203
HashWithSeedFamily(std::size_t n)
Defines a hash family functor that uses a given hash functor requiring a seed and instantiates it n t...
Definition: hash_utils.hpp:211
Abstracts a Kirsch-Mitzenmacher hash family functor.
Definition: hash_utils.hpp:159
KirschMitzenmacher(H1 h1, H2 h2, std::size_t k)
Abstracts a Kirsch-Mitzenmacher hash family functor.
Definition: hash_utils.hpp:172
Defines a minimal interface for Hashing functor.
Definition: hash_utils.hpp:34
Defines a minimal interface for HashFamily functor.
Definition: hash_utils.hpp:57
KirschMitzenmacher< H1, H2 > makeKirschMitzenmacher(H1 h1, H2 h2, std::size_t k)
Helper function to instantiate a Kirsch-Mitzenmacher hash family.
Definition: hash_utils.hpp:254
HT makeHashWithRandomSeed()
Instantiates a hash function that requires a seed with a randomly generated seed.
Definition: hash_utils.hpp:142
uint64_t hashType64(HashFamilyT &h, std::size_t idx, const T &t)
Applies a given hash family to a type T
Definition: hash_utils.hpp:79