Allow to use a string as a delimiter in StringAppendOperator (#8536)

Summary:
An arbitrary string can be used as a delimiter in StringAppend merge operator
flavor. In particular, it allows using an empty string, combining binary values for
the same key byte-to-byte one next to another.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8536

Reviewed By: mrambacher

Differential Revision: D29962120

Pulled By: zhichao-cao

fbshipit-source-id: 4ef5d846a47835cf428a11200409e30e2dbffc4f
This commit is contained in:
Mikhail Golubev 2021-08-02 16:49:54 -07:00 committed by Facebook GitHub Bot
parent ab7f7c9e49
commit 8f52972cf9
10 changed files with 149 additions and 36 deletions

View File

@ -9,6 +9,9 @@
### Performance Improvements
* Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value.
### Behavior Changes
* `StringAppendOperator` additionally accepts a string as the delimiter.
## 6.23.0 (2021-07-16)
### Behavior Changes
* Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present.

View File

@ -30,7 +30,7 @@
* Method: newSharedStringAppendOperator
* Signature: (C)J
*/
jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator(
jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__C(
JNIEnv* /*env*/, jclass /*jclazz*/, jchar jdelim) {
auto* sptr_string_append_op =
new std::shared_ptr<ROCKSDB_NAMESPACE::MergeOperator>(
@ -39,6 +39,20 @@ jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator(
return reinterpret_cast<jlong>(sptr_string_append_op);
}
jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__Ljava_lang_String_2(
JNIEnv* env, jclass /*jclass*/, jstring jdelim) {
jboolean has_exception = JNI_FALSE;
auto delim =
ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdelim, &has_exception);
if (has_exception == JNI_TRUE) {
return 0;
}
auto* sptr_string_append_op =
new std::shared_ptr<ROCKSDB_NAMESPACE::MergeOperator>(
ROCKSDB_NAMESPACE::MergeOperators::CreateStringAppendOperator(delim));
return reinterpret_cast<jlong>(sptr_string_append_op);
}
/*
* Class: org_rocksdb_StringAppendOperator
* Method: disposeInternal

View File

@ -19,6 +19,11 @@ public class StringAppendOperator extends MergeOperator {
super(newSharedStringAppendOperator(delim));
}
public StringAppendOperator(String delim) {
super(newSharedStringAppendOperator(delim));
}
private native static long newSharedStringAppendOperator(final char delim);
private native static long newSharedStringAppendOperator(final String delim);
@Override protected final native void disposeInternal(final long handle);
}

View File

@ -412,6 +412,32 @@ public class MergeTest {
}
}
@Test
public void emptyStringAsStringAppendDelimiter() throws RocksDBException {
try (final StringAppendOperator stringAppendOperator = new StringAppendOperator("");
final Options opt =
new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
db.put("key".getBytes(), "aa".getBytes());
db.merge("key".getBytes(), "bb".getBytes());
final byte[] value = db.get("key".getBytes());
assertThat(new String(value)).isEqualTo("aabb");
}
}
@Test
public void multiCharStringAsStringAppendDelimiter() throws RocksDBException {
try (final StringAppendOperator stringAppendOperator = new StringAppendOperator("<>");
final Options opt =
new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
db.put("key".getBytes(), "aa".getBytes());
db.merge("key".getBytes(), "bb".getBytes());
final byte[] value = db.get("key".getBytes());
assertThat(new String(value)).isEqualTo("aa<>bb");
}
}
@Test
public void emptyStringInSetMergeOperatorByName() {
try (final Options opt = new Options()

View File

@ -20,6 +20,8 @@ class MergeOperators {
static std::shared_ptr<MergeOperator> CreateUInt64AddOperator();
static std::shared_ptr<MergeOperator> CreateStringAppendOperator();
static std::shared_ptr<MergeOperator> CreateStringAppendOperator(char delim_char);
static std::shared_ptr<MergeOperator> CreateStringAppendOperator(
const std::string& delim);
static std::shared_ptr<MergeOperator> CreateStringAppendTESTOperator();
static std::shared_ptr<MergeOperator> CreateMaxOperator();
static std::shared_ptr<MergeOperator> CreateBytesXOROperator();

View File

@ -17,8 +17,10 @@ namespace ROCKSDB_NAMESPACE {
// Constructor: also specify the delimiter character.
StringAppendOperator::StringAppendOperator(char delim_char)
: delim_(delim_char) {
}
: delim_(1, delim_char) {}
StringAppendOperator::StringAppendOperator(const std::string& delim)
: delim_(delim) {}
// Implementation for the merge operation (concatenates two strings)
bool StringAppendOperator::Merge(const Slice& /*key*/,
@ -35,9 +37,9 @@ bool StringAppendOperator::Merge(const Slice& /*key*/,
} else {
// Generic append (existing_value != null).
// Reserve *new_value to correct size, and apply concatenation.
new_value->reserve(existing_value->size() + 1 + value.size());
new_value->reserve(existing_value->size() + delim_.size() + value.size());
new_value->assign(existing_value->data(), existing_value->size());
new_value->append(1,delim_);
new_value->append(delim_);
new_value->append(value.data(), value.size());
}
@ -56,4 +58,9 @@ std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(char d
return std::make_shared<StringAppendOperator>(delim_char);
}
std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(
const std::string& delim) {
return std::make_shared<StringAppendOperator>(delim);
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -14,6 +14,7 @@ class StringAppendOperator : public AssociativeMergeOperator {
public:
// Constructor: specify delimiter
explicit StringAppendOperator(char delim_char);
explicit StringAppendOperator(const std::string& delim);
virtual bool Merge(const Slice& key,
const Slice* existing_value,
@ -24,8 +25,7 @@ class StringAppendOperator : public AssociativeMergeOperator {
virtual const char* Name() const override;
private:
char delim_; // The delimiter is inserted between elements
std::string delim_; // The delimiter is inserted between elements
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -17,8 +17,10 @@ namespace ROCKSDB_NAMESPACE {
// Constructor: also specify the delimiter character.
StringAppendTESTOperator::StringAppendTESTOperator(char delim_char)
: delim_(delim_char) {
}
: delim_(1, delim_char) {}
StringAppendTESTOperator::StringAppendTESTOperator(const std::string& delim)
: delim_(delim) {}
// Implementation for the merge operation (concatenates two strings)
bool StringAppendTESTOperator::FullMergeV2(
@ -37,7 +39,7 @@ bool StringAppendTESTOperator::FullMergeV2(
size_t numBytes = 0;
for (auto it = merge_in.operand_list.begin();
it != merge_in.operand_list.end(); ++it) {
numBytes += it->size() + 1; // Plus 1 for the delimiter
numBytes += it->size() + delim_.size();
}
// Only print the delimiter after the first entry has been printed
@ -50,15 +52,16 @@ bool StringAppendTESTOperator::FullMergeV2(
merge_in.existing_value->size());
printDelim = true;
} else if (numBytes) {
merge_out->new_value.reserve(
numBytes - 1); // Minus 1 since we have one less delimiter
// Without the existing (initial) value, the delimiter before the first of
// subsequent operands becomes redundant.
merge_out->new_value.reserve(numBytes - delim_.size());
}
// Concatenate the sequence of strings (and add a delimiter between each)
for (auto it = merge_in.operand_list.begin();
it != merge_in.operand_list.end(); ++it) {
if (printDelim) {
merge_out->new_value.append(1, delim_);
merge_out->new_value.append(delim_);
}
merge_out->new_value.append(it->data(), it->size());
printDelim = true;
@ -89,7 +92,7 @@ bool StringAppendTESTOperator::_AssocPartialMergeMulti(
for (const auto& operand : operand_list) {
size += operand.size();
}
size += operand_list.size() - 1; // Delimiters
size += (operand_list.size() - 1) * delim_.length(); // Delimiters
new_value->reserve(size);
// Apply concatenation
@ -97,7 +100,7 @@ bool StringAppendTESTOperator::_AssocPartialMergeMulti(
for (std::deque<Slice>::const_iterator it = operand_list.begin() + 1;
it != operand_list.end(); ++it) {
new_value->append(1, delim_);
new_value->append(delim_);
new_value->append(it->data(), it->size());
}

View File

@ -24,6 +24,7 @@ class StringAppendTESTOperator : public MergeOperator {
public:
// Constructor with delimiter
explicit StringAppendTESTOperator(char delim_char);
explicit StringAppendTESTOperator(const std::string& delim);
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
MergeOperationOutput* merge_out) const override;
@ -42,8 +43,7 @@ class StringAppendTESTOperator : public MergeOperator {
const std::deque<Slice>& operand_list,
std::string* new_value, Logger* logger) const;
char delim_; // The delimiter is inserted between elements
std::string delim_; // The delimiter is inserted between elements
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -35,22 +35,34 @@ const std::string kDbName = test::PerThreadDBPath("stringappend_test");
namespace {
// OpenDb opens a (possibly new) rocksdb database with a StringAppendOperator
std::shared_ptr<DB> OpenNormalDb(char delim_char) {
std::shared_ptr<DB> OpenNormalDb(const std::string& delim) {
DB* db;
Options options;
options.create_if_missing = true;
options.merge_operator.reset(new StringAppendOperator(delim_char));
MergeOperator* mergeOperator;
if (delim.size() == 1) {
mergeOperator = new StringAppendOperator(delim[0]);
} else {
mergeOperator = new StringAppendOperator(delim);
}
options.merge_operator.reset(mergeOperator);
EXPECT_OK(DB::Open(options, kDbName, &db));
return std::shared_ptr<DB>(db);
}
#ifndef ROCKSDB_LITE // TtlDb is not supported in Lite
// Open a TtlDB with a non-associative StringAppendTESTOperator
std::shared_ptr<DB> OpenTtlDb(char delim_char) {
std::shared_ptr<DB> OpenTtlDb(const std::string& delim) {
DBWithTTL* db;
Options options;
options.create_if_missing = true;
options.merge_operator.reset(new StringAppendTESTOperator(delim_char));
MergeOperator* mergeOperator;
if (delim.size() == 1) {
mergeOperator = new StringAppendTESTOperator(delim[0]);
} else {
mergeOperator = new StringAppendTESTOperator(delim);
}
options.merge_operator.reset(mergeOperator);
EXPECT_OK(DBWithTTL::Open(options, kDbName, &db, 123456));
return std::shared_ptr<DB>(db);
}
@ -137,7 +149,7 @@ class StringAppendOperatorTest : public testing::Test,
StringAppendOperatorTest::SetOpenDbFunction(&OpenNormalDb);
}
typedef std::shared_ptr<DB> (* OpenFuncPtr)(char);
typedef std::shared_ptr<DB> (*OpenFuncPtr)(const std::string&);
// Allows user to open databases with different configurations.
// e.g.: Can open a DB or a TtlDB, etc.
@ -153,7 +165,7 @@ StringAppendOperatorTest::OpenFuncPtr StringAppendOperatorTest::OpenDb = nullptr
// THE TEST CASES BEGIN HERE
TEST_P(StringAppendOperatorTest, IteratorTest) {
auto db_ = OpenDb(',');
auto db_ = OpenDb(",");
StringLists slists(db_);
slists.Append("k1", "v1");
@ -246,7 +258,7 @@ TEST_P(StringAppendOperatorTest, IteratorTest) {
}
TEST_P(StringAppendOperatorTest, SimpleTest) {
auto db = OpenDb(',');
auto db = OpenDb(",");
StringLists slists(db);
slists.Append("k1", "v1");
@ -259,7 +271,7 @@ TEST_P(StringAppendOperatorTest, SimpleTest) {
}
TEST_P(StringAppendOperatorTest, SimpleDelimiterTest) {
auto db = OpenDb('|');
auto db = OpenDb("|");
StringLists slists(db);
slists.Append("k1", "v1");
@ -271,8 +283,49 @@ TEST_P(StringAppendOperatorTest, SimpleDelimiterTest) {
ASSERT_EQ(res, "v1|v2|v3");
}
TEST_P(StringAppendOperatorTest, EmptyDelimiterTest) {
auto db = OpenDb("");
StringLists slists(db);
slists.Append("k1", "v1");
slists.Append("k1", "v2");
slists.Append("k1", "v3");
std::string res;
ASSERT_TRUE(slists.Get("k1", &res));
ASSERT_EQ(res, "v1v2v3");
}
TEST_P(StringAppendOperatorTest, MultiCharDelimiterTest) {
auto db = OpenDb("<>");
StringLists slists(db);
slists.Append("k1", "v1");
slists.Append("k1", "v2");
slists.Append("k1", "v3");
std::string res;
ASSERT_TRUE(slists.Get("k1", &res));
ASSERT_EQ(res, "v1<>v2<>v3");
}
TEST_P(StringAppendOperatorTest, DelimiterIsDefensivelyCopiedTest) {
std::string delimiter = "<>";
auto db = OpenDb(delimiter);
StringLists slists(db);
slists.Append("k1", "v1");
slists.Append("k1", "v2");
delimiter.clear();
slists.Append("k1", "v3");
std::string res;
ASSERT_TRUE(slists.Get("k1", &res));
ASSERT_EQ(res, "v1<>v2<>v3");
}
TEST_P(StringAppendOperatorTest, OneValueNoDelimiterTest) {
auto db = OpenDb('!');
auto db = OpenDb("!");
StringLists slists(db);
slists.Append("random_key", "single_val");
@ -283,7 +336,7 @@ TEST_P(StringAppendOperatorTest, OneValueNoDelimiterTest) {
}
TEST_P(StringAppendOperatorTest, VariousKeys) {
auto db = OpenDb('\n');
auto db = OpenDb("\n");
StringLists slists(db);
slists.Append("c", "asdasd");
@ -309,7 +362,7 @@ TEST_P(StringAppendOperatorTest, VariousKeys) {
// Generate semi random keys/words from a small distribution.
TEST_P(StringAppendOperatorTest, RandomMixGetAppend) {
auto db = OpenDb(' ');
auto db = OpenDb(" ");
StringLists slists(db);
// Generate a list of random keys and values
@ -359,7 +412,7 @@ TEST_P(StringAppendOperatorTest, RandomMixGetAppend) {
}
TEST_P(StringAppendOperatorTest, BIGRandomMixGetAppend) {
auto db = OpenDb(' ');
auto db = OpenDb(" ");
StringLists slists(db);
// Generate a list of random keys and values
@ -411,7 +464,7 @@ TEST_P(StringAppendOperatorTest, BIGRandomMixGetAppend) {
TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
// Perform the following operations in limited scope
{
auto db = OpenDb('\n');
auto db = OpenDb("\n");
StringLists slists(db);
slists.Append("c", "asdasd");
@ -434,7 +487,7 @@ TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
// Reopen the database (the previous changes should persist / be remembered)
{
auto db = OpenDb('\n');
auto db = OpenDb("\n");
StringLists slists(db);
slists.Append("c", "bbnagnagsx");
@ -460,7 +513,7 @@ TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
// Reopen the database (the previous changes should persist / be remembered)
{
auto db = OpenDb('\n');
auto db = OpenDb("\n");
StringLists slists(db);
// All changes should be on disk. This will test VersionSet Get()
@ -478,7 +531,7 @@ TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
TEST_P(StringAppendOperatorTest, PersistentFlushAndCompaction) {
// Perform the following operations in limited scope
{
auto db = OpenDb('\n');
auto db = OpenDb("\n");
StringLists slists(db);
std::string a, b, c;
@ -517,7 +570,7 @@ TEST_P(StringAppendOperatorTest, PersistentFlushAndCompaction) {
// Reopen the database (the previous changes should persist / be remembered)
{
auto db = OpenDb('\n');
auto db = OpenDb("\n");
StringLists slists(db);
std::string a, b, c;
@ -565,7 +618,7 @@ TEST_P(StringAppendOperatorTest, PersistentFlushAndCompaction) {
}
TEST_P(StringAppendOperatorTest, SimpleTestNullDelimiter) {
auto db = OpenDb('\0');
auto db = OpenDb(std::string(1, '\0'));
StringLists slists(db);
slists.Append("k1", "v1");