Ribbon: initial (general) algorithms and basic unit test (#7491)

Summary:
This is intended as the first commit toward a near-optimal alternative to static Bloom filters for SSTs. Stephan Walzer and I have agreed upon the name "Ribbon" for a PHSF based on his linear system construction in "Efficient Gauss Elimination for Near-Quadratic Matrices with One Short Random Block per Row, with Applications" ("SGauss") and my much faster "on the fly" algorithm for gaussian elimination (or for this linear system, "banding"), which can be faster than peeling while also more compact and flexible. See util/ribbon_alg.h for more detailed introduction and background. RIBBON = Rapid Incremental Boolean Banding ON-the-fly

This commit just adds generic (templatized) core algorithms and a basic unit test showing some features, including the ability to construct structures within 2.5% space overhead vs. information theoretic lower bound. (Compare to cache-local Bloom filter's ~50% space overhead -> ~30% reduction anticipated.) This commit does not include the storage scheme necessary to make queries fast, especially for filter queries, nor fractional "result bits", but there is some description already and those implementations will come soon. Nor does this commit add FilterPolicy support, for use in SST files, but that will also come soon.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7491

Reviewed By: jay-zhuang

Differential Revision: D24517954

Pulled By: pdillinger

fbshipit-source-id: 0119ee597e250d7e0edd38ada2ba50d755606fa7
This commit is contained in:
Peter Dillinger 2020-10-25 20:43:04 -07:00 committed by Facebook GitHub Bot
parent 6595267980
commit 25d54c799c
8 changed files with 1755 additions and 0 deletions

View File

@ -1178,6 +1178,7 @@ if(WITH_TESTS)
util/random_test.cc
util/rate_limiter_test.cc
util/repeatable_thread_test.cc
util/ribbon_test.cc
util/slice_test.cc
util/slice_transform_test.cc
util/timer_queue_test.cc

View File

@ -631,6 +631,7 @@ ifdef ASSERT_STATUS_CHECKED
sst_file_reader_test \
range_tombstone_fragmenter_test \
repeatable_thread_test \
ribbon_test \
skiplist_test \
slice_test \
sst_dump_test \
@ -708,6 +709,7 @@ TESTS_PLATFORM_DEPENDENT := \
io_posix_test \
hash_test \
random_test \
ribbon_test \
thread_local_test \
work_queue_test \
rate_limiter_test \
@ -1420,6 +1422,9 @@ hash_test: $(OBJ_DIR)/util/hash_test.o $(TEST_LIBRARY) $(LIBRARY)
random_test: $(OBJ_DIR)/util/random_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
ribbon_test: $(OBJ_DIR)/util/ribbon_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
option_change_migration_test: $(OBJ_DIR)/utilities/option_change_migration/option_change_migration_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)

View File

@ -1804,6 +1804,13 @@ ROCKS_TESTS = [
[],
[],
],
[
"ribbon_test",
"util/ribbon_test.cc",
"serial",
[],
[],
],
[
"sim_cache_test",
"utilities/simulator_cache/sim_cache_test.cc",

1
src.mk
View File

@ -495,6 +495,7 @@ TEST_MAIN_SOURCES = \
util/random_test.cc \
util/rate_limiter_test.cc \
util/repeatable_thread_test.cc \
util/ribbon_test.cc \
util/slice_test.cc \
util/slice_transform_test.cc \
util/timer_queue_test.cc \

View File

@ -40,6 +40,10 @@ struct Unsigned128 {
lo = lower;
hi = upper;
}
explicit operator uint64_t() { return lo; }
explicit operator uint32_t() { return static_cast<uint32_t>(lo); }
};
inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
@ -210,6 +214,11 @@ inline int BitParity(Unsigned128 v) {
return BitParity(Lower64of128(v)) ^ BitParity(Upper64of128(v));
}
template <typename T>
struct IsUnsignedUpTo128
: std::integral_constant<bool, std::is_unsigned<T>::value ||
std::is_same<T, Unsigned128>::value> {};
inline void EncodeFixed128(char* dst, Unsigned128 value) {
EncodeFixed64(dst, Lower64of128(value));
EncodeFixed64(dst + 8, Upper64of128(value));

821
util/ribbon_alg.h Normal file
View File

@ -0,0 +1,821 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <array>
#include "util/math128.h"
namespace ROCKSDB_NAMESPACE {
namespace ribbon {
// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
//
// ribbon_alg.h: generic versions of core algorithms.
//
// Ribbon is a Perfect Hash Static Function construction useful as a compact
// static Bloom filter alternative. It combines (a) a boolean (GF(2)) linear
// system construction that approximates a Band Matrix with hashing,
// (b) an incremental, on-the-fly Gaussian Elimination algorithm that is
// remarkably efficient and adaptable at constructing an upper-triangular
// band matrix from a set of band-approximating inputs from (a), and
// (c) a storage layout that is fast and adaptable as a filter.
//
// Footnotes: (a) "Efficient Gauss Elimination for Near-Quadratic Matrices
// with One Short Random Block per Row, with Applications" by Stefan
// Walzer and Martin Dietzfelbinger ("DW paper")
// (b) developed by Peter C. Dillinger, though not the first on-the-fly
// GE algorithm. See "On the fly Gaussian Elimination for LT codes" by
// Bioglio, Grangetto, Gaeta, and Sereno.
// (c) TODO: not yet implemented here
//
// See ribbon_impl.h for high-level behavioral summary. This file focuses
// on the core design details.
//
// ######################################################################
// ################# PHSF -> static filter reduction ####################
//
// A Perfect Hash Static Function is a data structure representing a
// map from anything hashable (a "key") to values of some fixed size.
// Crucially, it is allowed to return garbage values for anything not in
// the original set of map keys, and it is a "static" structure: entries
// cannot be added or deleted after construction. PHSFs representing n
// mappings to b-bit values (assume uniformly distributed) require at least
// n * b bits to represent, or at least b bits per entry. We typically
// describe the compactness of a PHSF by typical bits per entry as some
// function of b. For example, the MWHC construction (k=3 "peeling")
// requires about 1.0222*b and a variant called Xor+ requires about
// 1.08*b + 0.5 bits per entry.
//
// With more hashing, a PHSF can over-approximate a set as a Bloom filter
// does, with no FN queries and predictable false positive (FP) query
// rate. Instead of the user providing a value to map each input key to,
// a hash function provides the value. Keys in the original set will
// return a positive membership query because the underlying PHSF returns
// the same value as hashing the key. When a key is not in the original set,
// the PHSF returns a "garbage" value, which is only equal to the key's
// hash with (false positive) probability 1 in 2^b.
//
// For a matching false positive rate, standard Bloom filters require
// 1.44*b bits per entry. Cache-local Bloom filters (like bloom_impl.h)
// require a bit more, around 1.5*b bits per entry. Thus, a Bloom
// alternative could save up to or nearly 1/3rd of memory and storage
// that RocksDB uses for SST (static) Bloom filters. (Memtable Bloom filter
// is dynamic.)
//
// Recommended reading:
// "Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters"
// by Graf and Lemire
// First three sections of "Fast Scalable Construction of (Minimal
// Perfect Hash) Functions" by Genuzio, Ottaviano, and Vigna
//
// ######################################################################
// ################## PHSF vs. hash table vs. Bloom #####################
//
// You can think of traditional hash tables and related filter variants
// such as Cuckoo filters as utilizing an "OR" construction: a hash
// function associates a key with some slots and the data is returned if
// the data is found in any one of those slots. The collision resolution
// is visible in the final data structure and requires extra information.
// For example, Cuckoo filter uses roughly 1.05b + 2 bits per entry, and
// Golomb-Rice code (aka "GCS") as little as b + 1.5. When the data
// structure associates each input key with data in one slot, the
// structure implicitly constructs a (near-)minimal (near-)perfect hash
// (MPH) of the keys, which requires at least 1.44 bits per key to
// represent. This is why approaches with visible collision resolution
// have a fixed + 1.5 or more in storage overhead per entry, often in
// addition to an overhead multiplier on b.
//
// By contrast Bloom filters utilize an "AND" construction: a query only
// returns true if all bit positions associated with a key are set to 1.
// There is no collision resolution, so Bloom filters do not suffer a
// fixed bits per entry overhead like the above structures.
//
// PHSFs typically use a bitwise XOR construction: the data you want is
// not in a single slot, but in a linear combination of several slots.
// For static data, this gives the best of "AND" and "OR" constructions:
// avoids the +1.44 or more fixed overhead by not approximating a MPH and
// can do much better than Bloom's 1.44 factor on b with collision
// resolution, which here is done ahead of time and invisible at query
// time.
//
// ######################################################################
// ######################## PHSF construction ###########################
//
// For a typical PHSF, construction is solving a linear system of
// equations, typically in GF(2), which is to say that values are boolean
// and XOR serves both as addition and subtraction. We can use matrices to
// represent the problem:
//
// C * S = R
// (n x m) (m x b) (n x b)
// where C = coefficients, S = solution, R = results
// and solving for S given C and R.
//
// Note that C and R each have n rows, one for each input entry for the
// PHSF. A row in C is given by a hash function on the PHSF input key,
// and the corresponding row in R is the b-bit value to associate with
// that input key. (In a filter, rows of R are given by another hash
// function on the input key.)
//
// On solving, the matrix S (solution) is the final PHSF data, as it
// maps any row from the original C to its corresponding desired result
// in R. We just have to hash our query inputs and compute a linear
// combination of rows in S.
//
// In theory, we could chose m = n and let a hash function associate
// each input key with random rows in C. A solution exists with high
// probability, and uses essentially minimum space, b bits per entry
// (because we set m = n) but this has terrible scaling, something
// like O(n^2) space and O(n^3) time during construction (Gaussian
// elimination) and O(n) query time. But computational efficiency is
// key, and the core of this is avoiding scanning all of S to answer
// each query.
//
// The traditional approach (MWHC, aka Xor filter) starts with setting
// only some small fixed number of columns (typically k=3) to 1 for each
// row of C, with remaining entries implicitly 0. This is implemented as
// three hash functions over [0,m), and S can be implemented as a vector
// vector of b-bit values. Now, a query only involves looking up k rows
// (values) in S and computing their bitwise XOR. Additionally, this
// construction can use a linear time algorithm called "peeling" for
// finding a solution in many cases of one existing, but peeling
// generally requires a larger space overhead factor in the solution
// (m/n) than is required with Gaussian elimination.
//
// Recommended reading:
// "Peeling Close to the Orientability Threshold Spatial Coupling in
// Hashing-Based Data Structures" by Stefan Walzer
//
// ######################################################################
// ##################### Ribbon PHSF construction #######################
//
// Ribbon constructs coefficient rows essentially the same as in the
// Walzer/Dietzfelbinger paper cited above: for some chosen fixed width
// r (kCoeffBits in code), each key is hashed to a starting column in
// [0, m - r] (GetStart() in code) and an r-bit sequence of boolean
// coefficients (GetCoeffRow() in code). If you sort the rows by start,
// the C matrix would look something like this:
//
// [####00000000000000000000]
// [####00000000000000000000]
// [000####00000000000000000]
// [0000####0000000000000000]
// [0000000####0000000000000]
// [000000000####00000000000]
// [000000000####00000000000]
// [0000000000000####0000000]
// [0000000000000000####0000]
// [00000000000000000####000]
// [00000000000000000000####]
//
// where each # could be a 0 or 1, chosen uniformly by a hash function.
// (Except we typically set the start column value to 1.) This scheme
// uses hashing to approximate a band matrix, and it has a solution iff
// it reduces to an upper-triangular boolean r-band matrix, like this:
//
// [1###00000000000000000000]
// [01##00000000000000000000]
// [000000000000000000000000]
// [0001###00000000000000000]
// [000000000000000000000000]
// [000001##0000000000000000]
// [000000000000000000000000]
// [00000001###0000000000000]
// [000000001###000000000000]
// [0000000001##000000000000]
// ...
// [00000000000000000000001#]
// [000000000000000000000001]
//
// where we have expanded to an m x m matrix by filling with rows of
// all zeros as needed. As in Gaussian elimination, this form is ready for
// generating a solution through back-substitution.
//
// The awesome thing about the Ribbon construction (from the DW paper) is
// how row reductions keep each row representable as a start column and
// r coefficients, because row reductions are only needed when two rows
// have the same number of leading zero columns. Thus, the combination
// of those rows, the bitwise XOR of the r-bit coefficient rows, cancels
// out the leading 1s, so starts (at least) one column later and only
// needs (at most) r - 1 coefficients.
//
// ######################################################################
// ###################### Ribbon PHSF scalability #######################
//
// Although more practical detail is in ribbon_impl.h, it's worth
// understanding some of the overall benefits and limitations of the
// Ribbon PHSFs.
//
// High-end scalability is a primary issue for Ribbon PHSFs, because in
// a single Ribbon linear system with fixed r and fixed m/n ratio, the
// solution probability approaches zero as n approaches infinity.
// For a given n, solution probability improves with larger r and larger
// m/n.
//
// By contrast, peeling-based PHSFs have somewhat worse storage ratio
// or solution probability for small n (less than ~1000). This is
// especially true with spatial-coupling, where benefits are only
// notable for n on the order of 100k or 1m or more.
//
// To make best use of current hardware, r=128 seems to be closest to
// a "generally good" choice for Ribbon, at least in RocksDB where SST
// Bloom filters typically hold around 10-100k keys, and almost always
// less than 10m keys. r=128 ribbon has a high chance of encoding success
// (with first hash seed) when storage overhead is around 5% (m/n ~ 1.05)
// for roughly 10k - 10m keys in a single linear system. r=64 only scales
// up to about 10k keys with the same storage overhead. Construction and
// access times for r=128 are similar to r=64. r=128 tracks nearly
// twice as much data during construction, but in most cases we expect
// the scalability benefits of r=128 vs. r=64 to make it preferred.
//
// A natural approach to scaling Ribbon beyond ~10m keys is splitting
// (or "sharding") the inputs into multiple linear systems with their
// own hash seeds. This can also help to control peak memory consumption.
// TODO: much more to come
//
// ######################################################################
// #################### Ribbon on-the-fly banding #######################
//
// "Banding" is what we call the process of reducing the inputs to an
// upper-triangluar r-band matrix ready for finishing a solution with
// back-substitution. Although the DW paper presents an algorithm for
// this ("SGauss"), the awesome properties of their construction enable
// an even simpler, faster, and more backtrackable algorithm. In simplest
// terms, the SGauss algorithm requires sorting the inputs by start
// columns, but it's possible to make Gaussian elimination resemble hash
// table insertion!
//
// The enhanced algorithm is based on these observations:
// - When processing a coefficient row with first 1 in column j,
// - If it's the first at column j to be processed, it can be part of
// the banding at row j. (And that descision never overwritten, with
// no loss of generality!)
// - Else, it can be combined with existing row j and re-processed,
// which will look for a later "empty" row or reach "no solution".
//
// We call our banding algorithm "incremental" and "on-the-fly" because
// (like hash table insertion) we are "finished" after each input
// processed, with respect to all inputs processed so far. Although the
// band matrix is an intermediate step to the solution structure, we have
// eliminated intermediate steps and unnecessary data tracking for
// banding.
//
// Building on "incremental" and "on-the-fly", the banding algorithm is
// easily backtrackable because no (non-empty) rows are overwritten in
// the banding. Thus, if we want to "try" adding an additional set of
// inputs to the banding, we only have to record which rows were written
// in order to efficiently backtrack to our state before considering
// the additional set. (TODO: how this can mitigate scalability and
// reach sub-1% overheads)
//
// Like in a linear-probed hash table, as the occupancy approaches and
// surpasses 90-95%, collision resolution dominates the construction
// time. (Ribbon doesn't usually pay at query time; see solution
// storage below.) This means that we can speed up construction time
// by using a higher m/n ratio, up to negative returns around 1.2.
// At m/n ~= 1.2, which still saves memory substantially vs. Bloom
// filter's 1.5, construction speed (including back-substitution) is not
// far from sorting speed, but still a few times slower than cache-local
// Bloom construction speed.
//
// Back-substitution from an upper-triangular boolean band matrix is
// especially fast and easy. All the memory accesses are sequential or at
// least local, no random. If the number of result bits (b) is a
// compile-time constant, the back-substitution state can even be tracked
// in CPU registers. Regardless of the solution representation, we prefer
// column-major representation for tracking back-substitution state, as
// r (the band width) will typically be much larger than b (result bits
// or columns), so better to handle r-bit values b times (per solution
// row) than b-bit values r times.
//
// ######################################################################
// ##################### Ribbon solution storage ########################
//
// Row-major layout is typical for boolean (bit) matrices, including for
// MWHC (Xor) filters where a query combines k b-bit values, and k is
// typically smaller than b. Even for k=4 and b=2, at least k=4 random
// lookups are required regardless of layout.
//
// Ribbon PHSFs are quite different, however, because
// (a) all of the solution rows relevant to a query are within a single
// range of r rows, and
// (b) the number of solution rows involved (r/2 on average, or r if
// avoiding conditional accesses) is typically much greater than
// b, the number of solution columns.
//
// Row-major for Ribbon PHSFs therefore tends to incur undue CPU overhead
// by processing (up to) r entries of b bits each, where b is typically
// less than 10 for filter applications.
//
// Column-major layout has poor locality because of accessing up to b
// memory locations in different pages (and obviously cache lines). Note
// that negative filter queries do not typically need to access all
// solution columns, as they can return when a mismatch is found in any
// result/solution column. This optimization doesn't always pay off on
// recent hardware, where the penalty for unpredictable conditional
// branching can exceed the penalty for unnecessary work, but the
// optimization is essentially unavailable with row-major layout.
//
// The best compromise seems to be interleaving column-major on the small
// scale with row-major on the large scale. For example, let a solution
// "block" be r rows column-major encoded as b r-bit values in sequence.
// Each query accesses (up to) 2 adjacent blocks, which will typically
// span 1-3 cache lines in adjacent memory. We get very close to the same
// locality as row-major, but with much faster reconstruction of each
// result column, at least for filter applications where b is relatively
// small and negative queries can return early.
//
// ######################################################################
// ###################### Fractional result bits ########################
//
// Bloom filters have great flexibility that alternatives mostly do not
// have. One of those flexibilities is in utilizing any ratio of data
// structure bits per key. With a typical memory allocator like jemalloc,
// this flexibility can save roughly 10% of the filters' footprint in
// DRAM by rounding up and down filter sizes to minimize memory internal
// fragmentation (see optimize_filters_for_memory RocksDB option).
//
// At first glance, PHSFs only offer a whole number of bits per "slot"
// (m rather than number of keys n), but coefficient locality in the
// Ribbon construction makes fractional bits/key quite possible and
// attractive for filter applications.
//
// TODO: more detail
//
// ######################################################################
// ################### CODE: Ribbon core algorithms #####################
// ######################################################################
//
// These algorithms are templatized for genericity but near-maximum
// performance in a given application. The template parameters
// adhere to class/struct type concepts outlined below.
// Rough architecture for these algorithms:
//
// +-----------+ +---+ +-----------------+
// | AddInputs | --> | H | --> | BandingStorage |
// +-----------+ | a | +-----------------+
// | s | |
// | h | Back substitution
// | e | V
// +-----------+ | r | +-----------------+
// | Query Key | --> | | >+< | SolutionStorage |
// +-----------+ +---+ | +-----------------+
// V
// Query result
// Common to other concepts
// concept RibbonTypes {
// // An unsigned integer type for an r-bit subsequence of coefficients.
// // r (or kCoeffBits) is taken to be sizeof(CoeffRow) * 8, as it would
// // generally only hurt scalability to leave bits of CoeffRow unused.
// typename CoeffRow;
// // An unsigned integer type big enough to hold a result row (b bits,
// // or number of solution/result columns).
// // In many applications, especially filters, the number of result
// // columns is decided at run time, so ResultRow simply needs to be
// // big enough for the largest number of columns allowed.
// typename ResultRow;
// // An unsigned integer type sufficient for representing the number of
// // rows in the solution structure. (TODO: verify any extra needed?)
// typename Index;
// };
// ######################################################################
// ######################## Hashers and Banding #########################
// Hasher concepts abstract out hashing details.
// concept PhsfQueryHasher extends RibbonTypes {
// // Type for a lookup key, which is hashable.
// typename Key;
//
// // Type for hashed summary of a Key. uint64_t is recommended.
// typename Hash;
//
// // Compute a hash value summarizing a Key
// Hash GetHash(const Key &) const;
//
// // Given a hash value and a number of columns that can start an
// // r-sequence of coefficients (== m - r + 1), return the start
// // column to associate with that hash value. (Starts can be chosen
// // uniformly or "smash" extra entries into the beginning and end for
// // better utilization at those extremes of the structure. Details in
// // ribbon.impl.h)
// Index GetStart(Hash, Index num_starts) const;
//
// // Given a hash value, return the r-bit sequence of coefficients to
// // associate with it. It's generally OK if
// // sizeof(CoeffRow) > sizeof(Hash)
// // as long as the hash itself is not too prone to collsions for the
// // applications and the CoeffRow is generated uniformly from
// // available hash data, but relatively independent of the start.
// //
// // Must be non-zero, because that's required for a solution to exist
// // when mapping to non-zero result row. (Note: BandingAdd could be
// // modified to allow 0 coeff row if that only occurs with 0 result
// // row, which really only makes sense for filter implementation,
// // where both values are hash-derived. Or BandingAdd could reject 0
// // coeff row, forcing next seed, but that has potential problems with
// // generality/scalability.)
// CoeffRow GetCoeffRow(Hash) const;
// };
// concept FilterQueryHasher extends PhsfQueryHasher {
// // For building or querying a filter, this returns the expected
// // result row associated with a hashed input. For general PHSF,
// // this must return 0.
// //
// // Although not strictly required, there's a slightly better chance of
// // solver success if result row is masked down here to only the bits
// // actually needed.
// ResultRow GetResultRowFromHash(Hash) const;
// }
// concept BandingHasher extends FilterQueryHasher {
// // For a filter, this will generally be the same as Key.
// // For a general PHSF, it must either
// // (a) include a key and a result it maps to (e.g. in a std::pair), or
// // (b) GetResultRowFromInput looks up the result somewhere rather than
// // extracting it.
// typename AddInput;
//
// // Instead of requiring a way to extract a Key from an
// // AddInput, we require getting the hash of the Key part
// // of an AddInput, which is trivial if AddInput == Key.
// Hash GetHash(const AddInput &) const;
//
// // For building a non-filter PHSF, this extracts or looks up the result
// // row to associate with an input. For filter PHSF, this must return 0.
// ResultRow GetResultRowFromInput(const AddInput &) const;
//
// // Whether the solver can assume the lowest bit of GetCoeffRow is
// // always 1. When true, it should improve solver efficiency slightly.
// static bool kFirstCoeffAlwaysOne;
// }
// Abstract storage for the the result of "banding" the inputs (Gaussian
// elimination to an upper-triangular boolean band matrix). Because the
// banding is an incremental / on-the-fly algorithm, this also represents
// all the intermediate state between input entries.
//
// concept BandingStorage extends RibbonTypes {
// // Tells the banding algorithm to prefetch memory associated with
// // the next input before processing the current input. Generally
// // recommended iff the BandingStorage doesn't easily fit in CPU
// // cache.
// bool UsePrefetch() const;
//
// // Prefetches (e.g. __builtin_prefetch) memory associated with a
// // slot index i.
// void Prefetch(Index i) const;
//
// // Returns a pointer to CoeffRow for slot index i.
// CoeffRow* CoeffRowPtr(Index i);
//
// // Returns a pointer to ResultRow for slot index i. (Gaussian row
// // operations involve both side of the equation.)
// ResultRow* ResultRowPtr(Index i);
//
// // Returns the number of columns that can start an r-sequence of
// // coefficients, which is the number of slots minus r (kCoeffBits)
// // plus one. (m - r + 1)
// Index GetNumStarts() const;
// };
// Optional storage for backtracking data in banding a set of input
// entries. It exposes an array structure which will generally be
// used as a stack. It must be able to accommodate as many entries
// as are passed in as inputs to `BandingAddRange`.
//
// concept BacktrackStorage extends RibbonTypes {
// // If false, backtracking support will be disabled in the algorithm.
// // This should preferably be an inline compile-time constant function.
// bool UseBacktrack() const;
//
// // Records `to_save` as the `i`th backtrack entry
// void BacktrackPut(Index i, Index to_save);
//
// // Recalls the `i`th backtrack entry
// Index BacktrackGet(Index i) const;
// }
// Adds a single entry to BandingStorage (and optionally, BacktrackStorage),
// returning true if successful or false if solution is impossible with
// current hasher (and presumably its seed) and number of "slots" (solution
// or banding rows). (A solution is impossible when there is a linear
// dependence among the inputs that doesn't "cancel out".)
//
// Pre- and post-condition: the BandingStorage represents a band matrix
// ready for back substitution (row echelon form except for zero rows),
// augmented with result values such that back substitution would give a
// solution satisfying all the cr@start -> rr entries added.
template <bool kFirstCoeffAlwaysOne, typename BandingStorage,
typename BacktrackStorage>
bool BandingAdd(BandingStorage *bs, typename BandingStorage::Index start,
typename BandingStorage::ResultRow rr,
typename BandingStorage::CoeffRow cr, BacktrackStorage *bts,
typename BandingStorage::Index *backtrack_pos) {
using CoeffRow = typename BandingStorage::CoeffRow;
using Index = typename BandingStorage::Index;
Index i = start;
if (!kFirstCoeffAlwaysOne) {
// Requires/asserts that cr != 0
int tz = CountTrailingZeroBits(cr);
i += static_cast<Index>(tz);
cr >>= tz;
} else {
assert((cr & 1) == 1);
}
for (;;) {
CoeffRow other = *(bs->CoeffRowPtr(i));
if (other == 0) {
*(bs->CoeffRowPtr(i)) = cr;
*(bs->ResultRowPtr(i)) = rr;
bts->BacktrackPut(*backtrack_pos, i);
++*backtrack_pos;
return true;
}
assert((other & 1) == 1);
cr ^= other;
rr ^= *(bs->ResultRowPtr(i));
if (cr == 0) {
// Inconsistency or (less likely) redundancy
break;
}
int tz = CountTrailingZeroBits(cr);
i += static_cast<Index>(tz);
cr >>= tz;
}
// Failed, unless result row == 0 because e.g. a duplicate input or a
// stock hash collision, with same result row. (For filter, stock hash
// collision implies same result row.) Or we could have a full equation
// equal to sum of other equations, which is very possible with
// small range of values for result row.
return rr == 0;
}
// Adds a range of entries to BandingStorage returning true if successful
// or false if solution is impossible with current hasher (and presumably
// its seed) and number of "slots" (solution or banding rows). (A solution
// is impossible when there is a linear dependence among the inputs that
// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs.
//
// If UseBacktrack in the BacktrackStorage, this function call rolls back
// to prior state on failure. If !UseBacktrack, some subset of the entries
// will have been added to the BandingStorage, so best considered to be in
// an indeterminate state.
//
template <typename BandingStorage, typename BacktrackStorage,
typename BandingHasher, typename InputIterator>
bool BandingAddRange(BandingStorage *bs, BacktrackStorage *bts,
const BandingHasher &bh, InputIterator begin,
InputIterator end) {
using CoeffRow = typename BandingStorage::CoeffRow;
using Index = typename BandingStorage::Index;
using ResultRow = typename BandingStorage::ResultRow;
using Hash = typename BandingHasher::Hash;
static_assert(IsUnsignedUpTo128<CoeffRow>::value, "must be unsigned");
static_assert(IsUnsignedUpTo128<Index>::value, "must be unsigned");
static_assert(IsUnsignedUpTo128<ResultRow>::value, "must be unsigned");
constexpr bool kFCA1 = BandingHasher::kFirstCoeffAlwaysOne;
if (begin == end) {
// trivial
return true;
}
const Index num_starts = bs->GetNumStarts();
InputIterator cur = begin;
Index backtrack_pos = 0;
if (!bs->UsePrefetch()) {
// Simple version, no prefetch
for (;;) {
Hash h = bh.GetHash(*cur);
Index start = bh.GetStart(h, num_starts);
ResultRow rr =
bh.GetResultRowFromInput(*cur) | bh.GetResultRowFromHash(h);
CoeffRow cr = bh.GetCoeffRow(h);
if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
break;
}
if ((++cur) == end) {
return true;
}
}
} else {
// Pipelined w/prefetch
// Prime the pipeline
Hash h = bh.GetHash(*cur);
Index start = bh.GetStart(h, num_starts);
ResultRow rr = bh.GetResultRowFromInput(*cur);
bs->Prefetch(start);
// Pipeline
for (;;) {
rr |= bh.GetResultRowFromHash(h);
CoeffRow cr = bh.GetCoeffRow(h);
if ((++cur) == end) {
if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
break;
}
return true;
}
Hash next_h = bh.GetHash(*cur);
Index next_start = bh.GetStart(next_h, num_starts);
ResultRow next_rr = bh.GetResultRowFromInput(*cur);
bs->Prefetch(next_start);
if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
break;
}
h = next_h;
start = next_start;
rr = next_rr;
}
}
// failed; backtrack (if implemented)
if (bts->UseBacktrack()) {
while (backtrack_pos > 0) {
--backtrack_pos;
Index i = bts->BacktrackGet(backtrack_pos);
*(bs->CoeffRowPtr(i)) = 0;
// Not required: *(bs->ResultRowPtr(i)) = 0;
}
}
return false;
}
// Adds a range of entries to BandingStorage returning true if successful
// or false if solution is impossible with current hasher (and presumably
// its seed) and number of "slots" (solution or banding rows). (A solution
// is impossible when there is a linear dependence among the inputs that
// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs.
//
// On failure, some subset of the entries will have been added to the
// BandingStorage, so best considered to be in an indeterminate state.
//
template <typename BandingStorage, typename BandingHasher,
typename InputIterator>
bool BandingAddRange(BandingStorage *bs, const BandingHasher &bh,
InputIterator begin, InputIterator end) {
using Index = typename BandingStorage::Index;
struct NoopBacktrackStorage {
bool UseBacktrack() { return false; }
void BacktrackPut(Index, Index) {}
Index BacktrackGet(Index) {
assert(false);
return 0;
}
} nbts;
return BandingAddRange(bs, &nbts, bh, begin, end);
}
// ######################################################################
// ######################### Solution Storage ###########################
// Back-substitution and query algorithms unfortunately depend on some
// details of data layout in the final data structure ("solution"). Thus,
// there is no common SolutionStorage covering all the reasonable
// possibilities.
// ###################### SimpleSolutionStorage #########################
// SimpleSolutionStorage is for a row-major storage, typically with no
// unused bits in each ResultRow. This is mostly for demonstration
// purposes as the simplest solution storage scheme. It is relatively slow
// for filter queries.
// concept SimpleSolutionStorage extends RibbonTypes {
// void PrepareForNumStarts(Index num_starts) const;
// Index GetNumStarts() const;
// ResultRow Load(Index slot_num) const;
// void Store(Index slot_num, ResultRow data);
// };
// Back-substitution for generating a solution from BandingStorage to
// SimpleSolutionStorage.
template <typename SimpleSolutionStorage, typename BandingStorage>
void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &ss) {
using CoeffRow = typename BandingStorage::CoeffRow;
using Index = typename BandingStorage::Index;
using ResultRow = typename BandingStorage::ResultRow;
constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
constexpr auto kResultBits = static_cast<Index>(sizeof(ResultRow) * 8U);
// A column-major buffer of the solution matrix, containing enough
// recently-computed solution data to compute the next solution row
// (based also on banding data).
std::array<CoeffRow, kResultBits> state;
state.fill(0);
const Index num_starts = ss.GetNumStarts();
sss->PrepareForNumStarts(num_starts);
const Index num_slots = num_starts + kCoeffBits - 1;
for (Index i = num_slots; i > 0;) {
--i;
CoeffRow cr = *const_cast<BandingStorage &>(ss).CoeffRowPtr(i);
ResultRow rr = *const_cast<BandingStorage &>(ss).ResultRowPtr(i);
// solution row
ResultRow sr = 0;
for (Index j = 0; j < kResultBits; ++j) {
// Compute next solution bit at row i, column j (see derivation below)
CoeffRow tmp = state[j] << 1;
bool bit = (BitParity(tmp & cr) ^ ((rr >> j) & 1)) != 0;
tmp |= bit ? CoeffRow{1} : CoeffRow{0};
// Now tmp is solution at column j from row i for next kCoeffBits
// more rows. Thus, for valid solution, the dot product of the
// solution column with the coefficient row has to equal the result
// at that column,
// BitParity(tmp & cr) == ((rr >> j) & 1)
// Update state.
state[j] = tmp;
// add to solution row
sr |= (bit ? ResultRow{1} : ResultRow{0}) << j;
}
sss->Store(i, sr);
}
}
// Common functionality for querying a key (already hashed) in
// SimpleSolutionStorage.
template <typename SimpleSolutionStorage>
typename SimpleSolutionStorage::ResultRow SimpleQueryHelper(
typename SimpleSolutionStorage::Index start_slot,
typename SimpleSolutionStorage::CoeffRow cr,
const SimpleSolutionStorage &sss) {
using CoeffRow = typename SimpleSolutionStorage::CoeffRow;
using ResultRow = typename SimpleSolutionStorage::ResultRow;
constexpr unsigned kCoeffBits = static_cast<unsigned>(sizeof(CoeffRow) * 8U);
ResultRow result = 0;
for (unsigned i = 0; i < kCoeffBits; ++i) {
if (static_cast<unsigned>(cr >> i) & 1U) {
result ^= sss.Load(start_slot + i);
}
}
return result;
}
// General PHSF query a key from SimpleSolutionStorage.
template <typename SimpleSolutionStorage, typename PhsfQueryHasher>
typename SimpleSolutionStorage::ResultRow SimplePhsfQuery(
const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher,
const SimpleSolutionStorage &sss) {
const typename PhsfQueryHasher::Hash hash = hasher.GetHash(key);
return SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
hasher.GetCoeffRow(hash), sss);
}
// Filter query a key from SimpleSolutionStorage.
template <typename SimpleSolutionStorage, typename FilterQueryHasher>
bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key,
const FilterQueryHasher &hasher,
const SimpleSolutionStorage &sss) {
const typename FilterQueryHasher::Hash hash = hasher.GetHash(key);
const typename SimpleSolutionStorage::ResultRow expected =
hasher.GetResultRowFromHash(hash);
return expected ==
SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
hasher.GetCoeffRow(hash), sss);
}
// #################### InterleavedSolutionStorage ######################
// InterleavedSolutionStorage is row-major at a high level, for good
// locality, and column-major at a low level, for CPU efficiency
// especially in filter querys or relatively small number of result bits
// (== solution columns). The storage is a sequence of "blocks" where a
// block has one CoeffRow for each solution column.
// concept InterleavedSolutionStorage extends RibbonTypes {
// Index GetNumColumns() const;
// Index GetNumStarts() const;
// CoeffRow Load(Index block_num, Index column) const;
// void Store(Index block_num, Index column, CoeffRow data);
// };
// TODO: not yet implemented here (only in prototype code elsewhere)
} // namespace ribbon
} // namespace ROCKSDB_NAMESPACE

503
util/ribbon_impl.h Normal file
View File

@ -0,0 +1,503 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include "port/port.h" // for PREFETCH
#include "util/ribbon_alg.h"
namespace ROCKSDB_NAMESPACE {
namespace ribbon {
// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
//
// ribbon_impl.h: templated (parameterized) standard implementations
//
// Ribbon is a Perfect Hash Static Function construction useful as a compact
// static Bloom filter alternative. See ribbon_alg.h for core algorithms
// and core design details.
//
// TODO: more details on trade-offs and practical issues.
// Ribbon implementations in this file take these parameters, which must be
// provided in a class/struct type with members expressed in this concept:
// concept TypesAndSettings {
// // See RibbonTypes and *Hasher in ribbon_alg.h, except here we have
// // the added constraint that Hash be equivalent to either uint32_t or
// // uint64_t.
// typename Hash;
// typename CoeffRow;
// typename ResultRow;
// typename Index;
// typename Key;
// static constexpr bool kFirstCoeffAlwaysOne;
//
// // An unsigned integer type for identifying a hash seed, typically
// // uint32_t or uint64_t.
// typename Seed;
//
// // When true, the PHSF implements a static filter, expecting just
// // keys as inputs for construction. When false, implements a general
// // PHSF and expects std::pair<Key, ResultRow> as inputs for
// // construction.
// static constexpr bool kIsFilter;
//
// // When true, adds a tiny bit more hashing logic on queries and
// // construction to improve utilization at the beginning and end of
// // the structure. Recommended when CoeffRow is only 64 bits (or
// // less), so typical num_starts < 10k.
// static constexpr bool kUseSmash;
//
// // A seedable stock hash function on Keys. All bits of Hash must
// // be reasonably high quality. XXH functions recommended, but
// // Murmur, City, Farm, etc. also work.
// //
// // If sequential seeds are not sufficiently independent for your
// // stock hash function, consider multiplying by a large odd constant.
// // If seed 0 is still undesirable, consider adding 1 before the
// // multiplication.
// static Hash HashFn(const Key &, Seed);
// };
// A bit of a hack to automatically construct the type for
// AddInput based on a constexpr bool.
template <typename Key, typename ResultRow, bool IsFilter>
struct AddInputSelector {
// For general PHSF, not filter
using T = std::pair<Key, ResultRow>;
};
template <typename Key, typename ResultRow>
struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> {
// For Filter
using T = Key;
};
// To avoid writing 'typename' everwhere that we use types like 'Index'
#define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings) \
using CoeffRow = typename TypesAndSettings::CoeffRow; \
using ResultRow = typename TypesAndSettings::ResultRow; \
using Index = typename TypesAndSettings::Index; \
using Hash = typename TypesAndSettings::Hash; \
using Key = typename TypesAndSettings::Key; \
using Seed = typename TypesAndSettings::Seed; \
\
/* Some more additions */ \
using QueryInput = Key; \
using AddInput = typename ROCKSDB_NAMESPACE::ribbon::AddInputSelector< \
Key, ResultRow, TypesAndSettings::kIsFilter>::T; \
static constexpr auto kCoeffBits = \
static_cast<Index>(sizeof(CoeffRow) * 8U); \
\
/* Export to algorithm */ \
static constexpr bool kFirstCoeffAlwaysOne = \
TypesAndSettings::kFirstCoeffAlwaysOne; \
\
static_assert(sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) + \
sizeof(Hash) + sizeof(Key) + sizeof(Seed) + \
sizeof(QueryInput) + sizeof(AddInput) + kCoeffBits + \
kFirstCoeffAlwaysOne > \
0, \
"avoid unused warnings, semicolon expected after macro call")
// StandardHasher: A standard implementation of concepts RibbonTypes,
// PhsfQueryHasher, FilterQueryHasher, and BandingHasher from ribbon_alg.h.
//
// This implementation should be suitable for most all practical purposes
// as it "behaves" across a wide range of settings, with little room left
// for improvement. The key functionality in this hasher is generating
// CoeffRows, starts, and (for filters) ResultRows, which could be ~150
// bits of data or more, from a modest hash of 64 or even just 32 bits, with
// enough uniformity and bitwise independence to be close to "the best you
// can do" with available hash information in terms of FP rate and
// compactness. (64 bits recommended and sufficient for PHSF practical
// purposes.)
template <class TypesAndSettings>
class StandardHasher {
public:
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
StandardHasher(Seed seed = 0) : seed_(seed) {}
inline Hash GetHash(const Key& key) const {
return TypesAndSettings::HashFn(key, seed_);
};
// For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
inline Hash GetHash(const std::pair<Key, ResultRow>& bi) const {
return GetHash(bi.first);
};
inline Index GetStart(Hash h, Index num_starts) const {
// This is "critical path" code because it's required before memory
// lookup.
//
// FastRange gives us a fast and effective mapping from h to the
// approriate range. This depends most, sometimes exclusively, on
// upper bits of h.
//
if (TypesAndSettings::kUseSmash) {
// Extra logic to "smash" entries at beginning and end, for
// better utilization. For example, without smash and with
// kFirstCoeffAlwaysOne, there's about a 30% chance that the
// first slot in the banding will be unused, and worse without
// kFirstCoeffAlwaysOne. The ending slots are even less utilized
// without smash.
//
// But since this only affects roughly kCoeffBits of the slots,
// it's usually small enough to be ignorable (less computation in
// this function) when number of slots is roughly 10k or larger.
//
// TODO: re-check these degress of smash, esp with kFirstCoeffAlwaysOne
//
constexpr auto kFrontSmash = kCoeffBits / 2 - 1;
constexpr auto kBackSmash = kCoeffBits / 2;
Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash);
start = std::max(start, kFrontSmash);
start -= kFrontSmash;
start = std::min(start, num_starts - 1);
return start;
} else {
// For query speed, we allow small number of initial and final
// entries to be under-utilized.
// NOTE: This call statically enforces that Hash is equivalent to
// either uint32_t or uint64_t.
return FastRangeGeneric(h, num_starts);
}
}
inline CoeffRow GetCoeffRow(Hash h) const {
// This is a reasonably cheap but empirically effective remix/expansion
// of the hash data to fill CoeffRow. (Large primes)
// This is not so much "critical path" code because it can be done in
// parallel (instruction level) with memory lookup.
Unsigned128 a = Multiply64to128(h, 0x85EBCA77C2B2AE63U);
Unsigned128 b = Multiply64to128(h, 0x27D4EB2F165667C5U);
auto cr = static_cast<CoeffRow>(b ^ (a << 64) ^ (a >> 64));
if (kFirstCoeffAlwaysOne) {
cr |= 1;
} else {
// Still have to ensure non-zero
cr |= static_cast<unsigned>(cr == 0);
}
return cr;
}
inline ResultRow GetResultRowMask() const {
// TODO: will be used with InterleavedSolutionStorage
// For now, all bits set (note: might be a small type so might need to
// narrow after promotion)
return static_cast<ResultRow>(~ResultRow{0});
}
inline ResultRow GetResultRowFromHash(Hash h) const {
if (TypesAndSettings::kIsFilter) {
// In contrast to GetStart, here we draw primarily from lower bits,
// but not literally, which seemed to cause FP rate hit in some cases.
// This is not so much "critical path" code because it can be done in
// parallel (instruction level) with memory lookup.
auto rr = static_cast<ResultRow>(h ^ (h >> 13) ^ (h >> 26));
return rr & GetResultRowMask();
} else {
// Must be zero
return 0;
}
}
// For when AddInput == Key (kIsFilter == true)
inline ResultRow GetResultRowFromInput(const Key&) const {
// Must be zero
return 0;
}
// For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
inline ResultRow GetResultRowFromInput(
const std::pair<Key, ResultRow>& bi) const {
// Simple extraction
return bi.second;
}
bool NextSeed(Seed max_seed) {
if (seed_ >= max_seed) {
return false;
} else {
++seed_;
return true;
}
}
Seed GetSeed() const { return seed_; }
void ResetSeed(Seed seed = 0) { seed_ = seed; }
protected:
Seed seed_;
};
// StandardRehasher (and StandardRehasherAdapter): A variant of
// StandardHasher that uses the same type for keys as for hashes.
// This is primarily intended for building a Ribbon filter/PHSF
// from existing hashes without going back to original inputs in order
// to apply a different seed. This hasher seeds a 1-to-1 mixing
// transformation to apply a seed to an existing hash (or hash-sized key).
//
// Testing suggests essentially no degredation of solution success rate
// vs. going back to original inputs when changing hash seeds. For example:
// Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys
// is about 1.10 for both StandardHasher and StandardRehasher.
//
// concept RehasherTypesAndSettings: like TypesAndSettings but
// does not require Key or HashFn.
template <class RehasherTypesAndSettings>
class StandardRehasherAdapter : public RehasherTypesAndSettings {
public:
using Hash = typename RehasherTypesAndSettings::Hash;
using Key = Hash;
using Seed = typename RehasherTypesAndSettings::Seed;
static Hash HashFn(const Hash& input, Seed seed) {
static_assert(sizeof(Hash) <= 8, "Hash too big");
if (sizeof(Hash) > 4) {
// XXH3_avalanche / XXH3p_avalanche (64-bit), modified for seed
uint64_t h = input;
h ^= h >> 37;
h ^= seed * uint64_t{0xC2B2AE3D27D4EB4F};
h *= uint64_t{0x165667B19E3779F9};
h ^= h >> 32;
return static_cast<Hash>(h);
} else {
// XXH32_avalanche (32-bit), modified for seed
uint32_t h32 = static_cast<uint32_t>(input);
h32 ^= h32 >> 15;
h32 ^= seed * uint32_t{0x27D4EB4F};
h32 *= uint32_t{0x85EBCA77};
h32 ^= h32 >> 13;
h32 *= uint32_t{0xC2B2AE3D};
h32 ^= h32 >> 16;
return static_cast<Hash>(h32);
}
}
};
// See comment on StandardRehasherAdapter
template <class RehasherTypesAndSettings>
using StandardRehasher =
StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>;
// StandardBanding: a canonical implementation of BandingStorage and
// BacktrackStorage, with convenience API for banding (solving with on-the-fly
// Gaussian elimination) with and without backtracking.
template <class TypesAndSettings>
class StandardBanding : public StandardHasher<TypesAndSettings> {
public:
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
StandardBanding(Index num_slots = 0, Index backtrack_size = 0) {
if (num_slots > 0) {
Reset(num_slots, backtrack_size);
} else {
EnsureBacktrackSize(backtrack_size);
}
}
void Reset(Index num_slots, Index backtrack_size = 0) {
assert(num_slots >= kCoeffBits);
if (num_slots > num_slots_allocated_) {
coeff_rows_.reset(new CoeffRow[num_slots]());
// Note: don't strictly have to zero-init result_rows,
// except possible information leakage ;)
result_rows_.reset(new ResultRow[num_slots]());
num_slots_allocated_ = num_slots;
} else {
for (Index i = 0; i < num_slots; ++i) {
coeff_rows_[i] = 0;
// Note: don't strictly have to zero-init result_rows
result_rows_[i] = 0;
}
}
num_starts_ = num_slots - kCoeffBits + 1;
EnsureBacktrackSize(backtrack_size);
}
void EnsureBacktrackSize(Index backtrack_size) {
if (backtrack_size > backtrack_size_) {
backtrack_.reset(new Index[backtrack_size]);
backtrack_size_ = backtrack_size;
}
}
// ********************************************************************
// From concept BandingStorage
inline bool UsePrefetch() const {
// A rough guestimate of when prefetching during construction pays off.
// TODO: verify/validate
return num_starts_ > 1500;
}
inline void Prefetch(Index i) const {
PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */);
PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */);
}
inline CoeffRow* CoeffRowPtr(Index i) { return &coeff_rows_[i]; }
inline ResultRow* ResultRowPtr(Index i) { return &result_rows_[i]; }
inline Index GetNumStarts() const { return num_starts_; }
// from concept BacktrackStorage, for when backtracking is used
inline bool UseBacktrack() const { return true; }
inline void BacktrackPut(Index i, Index to_save) { backtrack_[i] = to_save; }
inline Index BacktrackGet(Index i) const { return backtrack_[i]; }
// ********************************************************************
// Some useful API, still somewhat low level. Here an input is
// a Key for filters, or std::pair<Key, ResultRow> for general PHSF.
// Adds a range of inputs to the banding, returning true if successful.
// False means none or some may have been successfully added, so it's
// best to Reset this banding before any further use.
//
// Adding can fail even before all the "slots" are completely "full".
//
template <typename InputIterator>
bool AddRange(InputIterator begin, InputIterator end) {
return BandingAddRange(this, *this, begin, end);
}
// Adds a range of inputs to the banding, returning true if successful,
// or if unsuccessful, rolls back to state before this call and returns
// false. Caller guarantees that the number of inputs in this batch
// does not exceed `backtrack_size` provided to Reset.
//
// Adding can fail even before all the "slots" are completely "full".
//
template <typename InputIterator>
bool AddRangeOrRollBack(InputIterator begin, InputIterator end) {
return BandingAddRange(this, this, *this, begin, end);
}
// Adds a single input to the banding, returning true if successful.
// If unsuccessful, returns false and banding state is unchanged.
//
// Adding can fail even before all the "slots" are completely "full".
//
bool Add(const AddInput& input) { return AddRange(&input, &input + 1); }
// Return the number of "occupied" rows (with non-zero coefficients stored).
Index GetOccupiedCount() const {
Index count = 0;
const Index num_slots = num_starts_ + kCoeffBits - 1;
for (Index i = 0; i < num_slots; ++i) {
if (coeff_rows_[i] != 0) {
++count;
}
}
return count;
}
// ********************************************************************
// High-level API
// Iteratively (a) resets the structure for `num_slots`, (b) attempts
// to add the range of inputs, and (c) if unsuccessful, chooses next
// hash seed, until either successful or unsuccessful with max_seed
// (minimum one seed attempted). Returns true if successful. In that
// case, use GetSeed() to get the successful seed.
//
// If unsuccessful, how best to continue is going to be application
// specific. It should be possible to choose parameters such that
// failure is extremely unlikely, using max_seed around 32 to 64.
// (TODO: APIs to help choose parameters) One option for fallback in
// constructing a filter is to construct a Bloom filter instead.
// Increasing num_slots is an option, but should not be used often
// unless construction maximum latency is a concern (rather than
// average running time of construction). Instead, choose parameters
// appropriately and trust that seeds are independent. (Also,
// increasing num_slots without changing hash seed would have a
// significant correlation in success, rather than independence.)
template <typename InputIterator>
bool ResetAndFindSeedToSolve(Index num_slots, InputIterator begin,
InputIterator end, Seed max_seed) {
StandardHasher<TypesAndSettings>::ResetSeed();
do {
Reset(num_slots);
bool success = AddRange(begin, end);
if (success) {
return true;
}
} while (StandardHasher<TypesAndSettings>::NextSeed(max_seed));
// No seed through max_seed worked.
return false;
}
protected:
// TODO: explore combining in a struct
std::unique_ptr<CoeffRow[]> coeff_rows_;
std::unique_ptr<ResultRow[]> result_rows_;
// We generally store "starts" instead of slots for speed of GetStart(),
// as in StandardHasher.
Index num_starts_ = 0;
Index num_slots_allocated_ = 0;
std::unique_ptr<Index[]> backtrack_;
Index backtrack_size_ = 0;
};
// Implements concept SimpleSolutionStorage, mostly for demonstration
// purposes. This is "in memory" only because it does not handle byte
// ordering issues for serialization.
template <class TypesAndSettings>
class InMemSimpleSolution {
public:
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
void PrepareForNumStarts(Index num_starts) {
const Index num_slots = num_starts + kCoeffBits - 1;
assert(num_slots >= kCoeffBits);
if (num_slots > num_slots_allocated_) {
// Do not need to init the memory
solution_rows_.reset(new ResultRow[num_slots]);
num_slots_allocated_ = num_slots;
}
num_starts_ = num_starts;
}
Index GetNumStarts() const { return num_starts_; }
ResultRow Load(Index slot_num) const { return solution_rows_[slot_num]; }
void Store(Index slot_num, ResultRow solution_row) {
solution_rows_[slot_num] = solution_row;
}
// ********************************************************************
// High-level API
template <typename BandingStorage>
void BackSubstFrom(const BandingStorage& ss) {
SimpleBackSubst(this, ss);
}
template <typename PhsfQueryHasher>
ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) {
assert(!TypesAndSettings::kIsFilter);
return SimplePhsfQuery(input, hasher, *this);
}
template <typename FilterQueryHasher>
bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) {
assert(TypesAndSettings::kIsFilter);
return SimpleFilterQuery(input, hasher, *this);
}
protected:
// We generally store "starts" instead of slots for speed of GetStart(),
// as in StandardHasher.
Index num_starts_ = 0;
Index num_slots_allocated_ = 0;
std::unique_ptr<ResultRow[]> solution_rows_;
};
} // namespace ribbon
} // namespace ROCKSDB_NAMESPACE
// For convenience working with templates
#define IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings) \
using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<TypesAndSettings>; \
using Banding = \
ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>; \
using SimpleSoln = \
ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>; \
static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) > 0, \
"avoid unused warnings, semicolon expected after macro call")

408
util/ribbon_test.cc Normal file
View File

@ -0,0 +1,408 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <cmath>
#include "test_util/testharness.h"
#include "util/coding.h"
#include "util/hash.h"
#include "util/ribbon_impl.h"
#ifndef GFLAGS
uint32_t FLAGS_thoroughness = 5;
#else
#include "util/gflags_compat.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
// Using 500 is a good test when you have time to be thorough.
// Default is for general RocksDB regression test runs.
DEFINE_uint32(thoroughness, 5, "iterations per configuration");
#endif // GFLAGS
template <typename TypesAndSettings>
class RibbonTypeParamTest : public ::testing::Test {};
class RibbonTest : public ::testing::Test {};
struct DefaultTypesAndSettings {
using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128;
using ResultRow = uint8_t;
using Index = uint32_t;
using Hash = uint64_t;
using Key = ROCKSDB_NAMESPACE::Slice;
using Seed = uint32_t;
static constexpr bool kIsFilter = true;
static constexpr bool kFirstCoeffAlwaysOne = true;
static constexpr bool kUseSmash = false;
static Hash HashFn(const Key& key, Seed seed) {
return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed);
}
};
using TypesAndSettings_Coeff128 = DefaultTypesAndSettings;
struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings {
static constexpr bool kUseSmash = true;
};
struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings {
using CoeffRow = uint64_t;
};
struct TypesAndSettings_Coeff64Smash : public DefaultTypesAndSettings {
using CoeffRow = uint64_t;
static constexpr bool kUseSmash = true;
};
struct TypesAndSettings_Result16 : public DefaultTypesAndSettings {
using ResultRow = uint16_t;
};
struct TypesAndSettings_IndexSizeT : public DefaultTypesAndSettings {
using Index = size_t;
};
struct TypesAndSettings_Hash32 : public DefaultTypesAndSettings {
using Hash = uint32_t;
static Hash HashFn(const Key& key, Seed seed) {
// NOTE: Using RockDB 32-bit Hash() here fails test below because of
// insufficient mixing of seed (or generally insufficient mixing)
return ROCKSDB_NAMESPACE::Upper32of64(
ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed));
}
};
struct TypesAndSettings_Hash32_Result16 : public TypesAndSettings_Hash32 {
using ResultRow = uint16_t;
};
struct TypesAndSettings_KeyString : public DefaultTypesAndSettings {
using Key = std::string;
};
struct TypesAndSettings_Seed8 : public DefaultTypesAndSettings {
using Seed = uint8_t;
};
struct TypesAndSettings_NoAlwaysOne : public DefaultTypesAndSettings {
static constexpr bool kFirstCoeffAlwaysOne = false;
};
struct TypesAndSettings_RehasherWrapped : public DefaultTypesAndSettings {
// This doesn't directly use StandardRehasher as a whole, but simulates
// its behavior with unseeded hash of key, then seeded hash-to-hash
// tranform.
static Hash HashFn(const Key& key, Seed seed) {
Hash unseeded = DefaultTypesAndSettings::HashFn(key, /*seed*/ 0);
using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter<
DefaultTypesAndSettings>;
return Rehasher::HashFn(unseeded, seed);
}
};
struct TypesAndSettings_Rehasher32Wrapped : public TypesAndSettings_Hash32 {
// This doesn't directly use StandardRehasher as a whole, but simulates
// its behavior with unseeded hash of key, then seeded hash-to-hash
// tranform.
static Hash HashFn(const Key& key, Seed seed) {
Hash unseeded = TypesAndSettings_Hash32::HashFn(key, /*seed*/ 0);
using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter<
TypesAndSettings_Hash32>;
return Rehasher::HashFn(unseeded, seed);
}
};
using TestTypesAndSettings =
::testing::Types<TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash,
TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash,
TypesAndSettings_Result16, TypesAndSettings_IndexSizeT,
TypesAndSettings_Hash32, TypesAndSettings_Hash32_Result16,
TypesAndSettings_KeyString, TypesAndSettings_Seed8,
TypesAndSettings_NoAlwaysOne,
TypesAndSettings_RehasherWrapped,
TypesAndSettings_Rehasher32Wrapped>;
TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings);
namespace {
struct KeyGen {
KeyGen(const std::string& prefix, uint64_t id) : id_(id), str_(prefix) {
ROCKSDB_NAMESPACE::PutFixed64(&str_, id_);
}
// Prefix (only one required)
KeyGen& operator++() {
++id_;
return *this;
}
KeyGen& operator+=(uint64_t incr) {
id_ += incr;
return *this;
}
const std::string& operator*() {
// Use multiplication to mix things up a little in the key
ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8],
id_ * uint64_t{0x1500000001});
return str_;
}
bool operator==(const KeyGen& other) {
// Same prefix is assumed
return id_ == other.id_;
}
bool operator!=(const KeyGen& other) {
// Same prefix is assumed
return id_ != other.id_;
}
uint64_t id_;
std::string str_;
};
// For testing Poisson-distributed (or similar) statistics, get value for
// `stddevs_allowed` standard deviations above expected mean
// `expected_count`.
// (Poisson approximates Binomial only if probability of a trial being
// in the count is low.)
uint64_t PoissonUpperBound(double expected_count, double stddevs_allowed) {
return static_cast<uint64_t>(
expected_count + stddevs_allowed * std::sqrt(expected_count) + 1.0);
}
uint64_t PoissonLowerBound(double expected_count, double stddevs_allowed) {
return static_cast<uint64_t>(std::max(
0.0, expected_count - stddevs_allowed * std::sqrt(expected_count)));
}
uint64_t FrequentPoissonUpperBound(double expected_count) {
// Allow up to 5.0 standard deviations for frequently checked statistics
return PoissonUpperBound(expected_count, 5.0);
}
uint64_t FrequentPoissonLowerBound(double expected_count) {
return PoissonLowerBound(expected_count, 5.0);
}
uint64_t InfrequentPoissonUpperBound(double expected_count) {
// Allow up to 3 standard deviations for infrequently checked statistics
return PoissonUpperBound(expected_count, 3.0);
}
uint64_t InfrequentPoissonLowerBound(double expected_count) {
return PoissonLowerBound(expected_count, 3.0);
}
} // namespace
TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
IMPORT_RIBBON_IMPL_TYPES(TypeParam);
// For testing FP rate etc.
constexpr Index kNumToCheck = 100000;
constexpr size_t kNumSolutionColumns = 8U * sizeof(ResultRow);
const double expected_fp_count =
kNumToCheck * std::pow(0.5, kNumSolutionColumns);
const auto log2_thoroughness =
static_cast<Seed>(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness));
// FIXME: This upper bound seems excessive
const Seed max_seed = 12 + log2_thoroughness;
// With overhead of just 2%, expect ~50% encoding success per
// seed with ~5k keys on 64-bit ribbon, or ~150k keys on 128-bit ribbon.
const double kFactor = 1.02;
uint64_t total_reseeds = 0;
uint64_t total_single_failures = 0;
uint64_t total_batch_successes = 0;
uint64_t total_fp_count = 0;
uint64_t total_added = 0;
for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) {
Index numToAdd =
sizeof(CoeffRow) == 16 ? 130000 : TypeParam::kUseSmash ? 5000 : 2500;
// Use different values between that number and 50% of that number
numToAdd -= (i * 15485863) % (numToAdd / 2);
total_added += numToAdd;
const Index kNumSlots = static_cast<Index>(numToAdd * kFactor);
std::string prefix;
// Take different samples if you change thoroughness
ROCKSDB_NAMESPACE::PutFixed32(&prefix,
i + (FLAGS_thoroughness * 123456789U));
// Batch that must be added
std::string added_str = prefix + "added";
KeyGen keys_begin(added_str, 0);
KeyGen keys_end(added_str, numToAdd);
// Batch that may or may not be added
const Index kBatchSize =
sizeof(CoeffRow) == 16 ? 300 : TypeParam::kUseSmash ? 20 : 10;
std::string batch_str = prefix + "batch";
KeyGen batch_begin(batch_str, 0);
KeyGen batch_end(batch_str, kBatchSize);
// Batch never (successfully) added, but used for querying FP rate
std::string not_str = prefix + "not";
KeyGen other_keys_begin(not_str, 0);
KeyGen other_keys_end(not_str, kNumToCheck);
SimpleSoln soln;
Hasher hasher;
bool first_single;
bool second_single;
bool batch_success;
{
Banding banding;
// Traditional solve for a fixed set.
ASSERT_TRUE(banding.ResetAndFindSeedToSolve(kNumSlots, keys_begin,
keys_end, max_seed));
// Now to test backtracking, starting with guaranteed fail
Index occupied_count = banding.GetOccupiedCount();
banding.EnsureBacktrackSize(kNumToCheck);
ASSERT_FALSE(
banding.AddRangeOrRollBack(other_keys_begin, other_keys_end));
ASSERT_EQ(occupied_count, banding.GetOccupiedCount());
// Check that we still have a good chance of adding a couple more
// individually
first_single = banding.Add("one_more");
second_single = banding.Add("two_more");
Index more_added = (first_single ? 1 : 0) + (second_single ? 1 : 0);
total_single_failures += 2U - more_added;
// Or as a batch
batch_success = banding.AddRangeOrRollBack(batch_begin, batch_end);
if (batch_success) {
more_added += kBatchSize;
++total_batch_successes;
}
ASSERT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
// Now back-substitution
soln.BackSubstFrom(banding);
Seed seed = banding.GetSeed();
total_reseeds += seed;
if (seed > log2_thoroughness + 1) {
fprintf(stderr, "%s high reseeds at %u, %u: %u\n",
seed > log2_thoroughness + 8 ? "FIXME Extremely" : "Somewhat",
static_cast<unsigned>(i), static_cast<unsigned>(numToAdd),
static_cast<unsigned>(seed));
}
hasher.ResetSeed(seed);
}
// soln and hasher now independent of Banding object
// Verify keys added
KeyGen cur = keys_begin;
while (cur != keys_end) {
EXPECT_TRUE(soln.FilterQuery(*cur, hasher));
++cur;
}
// We (maybe) snuck these in!
if (first_single) {
EXPECT_TRUE(soln.FilterQuery("one_more", hasher));
}
if (second_single) {
EXPECT_TRUE(soln.FilterQuery("two_more", hasher));
}
if (batch_success) {
cur = batch_begin;
while (cur != batch_end) {
EXPECT_TRUE(soln.FilterQuery(*cur, hasher));
++cur;
}
}
// Check FP rate (depends only on number of result bits == solution columns)
Index fp_count = 0;
cur = other_keys_begin;
while (cur != other_keys_end) {
fp_count += soln.FilterQuery(*cur, hasher) ? 1 : 0;
++cur;
}
// For expected FP rate, also include false positives due to collisions
// in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
double correction =
1.0 * kNumToCheck * numToAdd / std::pow(256.0, sizeof(Hash));
EXPECT_LE(fp_count,
FrequentPoissonUpperBound(expected_fp_count + correction));
EXPECT_GE(fp_count,
FrequentPoissonLowerBound(expected_fp_count + correction));
total_fp_count += fp_count;
}
{
double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness;
fprintf(stderr, "Average re-seeds: %g\n", average_reseeds);
// Values above were chosen to target around 50% chance of encoding success
// rate (average of 1.0 re-seeds) or slightly better. But 1.1 is also close
// enough.
EXPECT_LE(total_reseeds,
InfrequentPoissonUpperBound(1.1 * FLAGS_thoroughness));
EXPECT_GE(total_reseeds,
InfrequentPoissonLowerBound(0.9 * FLAGS_thoroughness));
}
{
uint64_t total_singles = 2 * FLAGS_thoroughness;
double single_failure_rate = 1.0 * total_single_failures / total_singles;
fprintf(stderr, "Add'l single, failure rate: %g\n", single_failure_rate);
// A rough bound (one sided) based on nothing in particular
double expected_single_failures =
1.0 * total_singles /
(sizeof(CoeffRow) == 16 ? 128 : TypeParam::kUseSmash ? 64 : 32);
EXPECT_LE(total_single_failures,
InfrequentPoissonUpperBound(expected_single_failures));
}
{
// Counting successes here for Poisson to approximate the Binomial
// distribution.
// A rough bound (one sided) based on nothing in particular.
double expected_batch_successes = 1.0 * FLAGS_thoroughness / 2;
uint64_t lower_bound =
InfrequentPoissonLowerBound(expected_batch_successes);
fprintf(stderr, "Add'l batch, success rate: %g (>= %g)\n",
1.0 * total_batch_successes / FLAGS_thoroughness,
1.0 * lower_bound / FLAGS_thoroughness);
EXPECT_GE(total_batch_successes, lower_bound);
}
{
uint64_t total_checked = uint64_t{kNumToCheck} * FLAGS_thoroughness;
double expected_total_fp_count =
total_checked * std::pow(0.5, kNumSolutionColumns);
// For expected FP rate, also include false positives due to collisions
// in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
expected_total_fp_count += 1.0 * total_checked * total_added /
FLAGS_thoroughness /
std::pow(256.0, sizeof(Hash));
uint64_t upper_bound = InfrequentPoissonUpperBound(expected_total_fp_count);
uint64_t lower_bound = InfrequentPoissonLowerBound(expected_total_fp_count);
fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n",
1.0 * total_fp_count / total_checked,
expected_total_fp_count / total_checked,
1.0 * upper_bound / total_checked,
1.0 * lower_bound / total_checked);
// FIXME: this can fail for Result16, e.g. --thoroughness=100
// Seems due to inexpensive hashing in StandardHasher::GetCoeffRow and
// GetResultRowFromHash as replacing those with different Hash64 instances
// fixes it, at least mostly.
EXPECT_LE(total_fp_count, upper_bound);
EXPECT_GE(total_fp_count, lower_bound);
}
}
TEST(RibbonTest, Another) {
IMPORT_RIBBON_TYPES_AND_SETTINGS(DefaultTypesAndSettings);
IMPORT_RIBBON_IMPL_TYPES(DefaultTypesAndSettings);
// TODO
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
#ifdef GFLAGS
ParseCommandLineFlags(&argc, &argv, true);
#endif // GFLAGS
return RUN_ALL_TESTS();
}