Implemented StringAppendOperator and unit tests.

Summary:
Implemented the StringAppendOperator class (subclass of MergeOperator).
Found in utilities/merge_operators/string_append/stringappend.{h,cc}

It is a rocksdb Merge Operator that supports string/list concatenation
 with a configurable delimiter.

The tests are found in .../stringappend_test.cc. It implements a
 map : key -> (list of strings), with core operations Append(list_key,val)
 and Get(list_key).

Test Plan:
1. Navigate to your rocksdb repository
2. Execute: make stringappend_test  (to compile)
3. Execute: ./stringappend_test (to run the tests)
4. Execute: make all check (to test the ENTIRE rocksdb codebase / regression)

Reviewers: haobo, dhruba, zshao

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D10737
This commit is contained in:
Deon Nicholas 2013-05-10 10:40:10 -07:00
parent 4ca3c67bd3
commit accd3debbb
6 changed files with 568 additions and 1 deletions

View File

@ -61,7 +61,8 @@ TESTS = \
write_batch_test \ write_batch_test \
auto_roll_logger_test \ auto_roll_logger_test \
filelock_test \ filelock_test \
merge_test merge_test \
stringappend_test
TOOLS = \ TOOLS = \
sst_dump \ sst_dump \
@ -174,6 +175,9 @@ cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)
stringappend_test: utilities/merge_operators/string_append/stringappend_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) utilities/merge_operators/string_append/stringappend_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)
histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) $(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)

BIN
utilities/.DS_Store vendored Normal file

Binary file not shown.

BIN
utilities/merge_operators/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,57 @@
/**
* A MergeOperator for rocksdb/leveldb that implements string append.
* @author Deon Nicholas (dnicholas@fb.com)
* Copyright 2013 Facebook
*/
#include "stringappend.h"
#include <memory>
#include <assert.h>
#include "leveldb/slice.h"
#include "leveldb/merge_operator.h"
#include "utilities/merge_operators.h"
#include <iostream>
namespace leveldb {
// Constructor: also specify the delimiter character.
StringAppendOperator::StringAppendOperator(char delim_char)
: delim_(delim_char) {
}
// Implementation for the merge operation (concatenates two strings)
void StringAppendOperator::Merge(const Slice& key,
const Slice* existing_value,
const Slice& value,
std::string* new_value,
Logger* logger) const {
// Clear the *new_value for writing.
assert(new_value);
new_value->clear();
if (!existing_value) {
// No existing_value. Set *new_value = value
new_value->assign(value.data(),value.size());
} else {
// Generic append (existing_value != null).
// Reserve *new_value to correct size, and apply concatenation.
new_value->reserve(existing_value->size() + 1 + value.size());
new_value->assign(existing_value->data(),existing_value->size());
new_value->append(1,delim_);
new_value->append(value.data(), value.size());
}
return;
}
const char* StringAppendOperator::Name() const {
return "StringAppendOperator";
}
} // namespace leveldb

View File

@ -0,0 +1,31 @@
/**
* A MergeOperator for rocksdb/leveldb that implements string append.
* @author Deon Nicholas (dnicholas@fb.com)
* Copyright 2013 Facebook
*/
#include "leveldb/merge_operator.h"
#include "leveldb/slice.h"
namespace leveldb {
class StringAppendOperator : public MergeOperator {
public:
StringAppendOperator(char delim_char); /// Constructor: specify delimiter
virtual void Merge(const Slice& key,
const Slice* existing_value,
const Slice& value,
std::string* new_value,
Logger* logger) const override;
virtual const char* Name() const override;
private:
char delim_; // The delimiter is inserted between elements
};
} // namespace leveldb

View File

@ -0,0 +1,475 @@
/**
* An persistent map : key -> (list of strings), using rocksdb merge.
* This file is a test-harness / use-case for the StringAppendOperator.
*
* @author Deon Nicholas (dnicholas@fb.com)
* Copyright 2013 Facebook, Inc.
*/
#include <iostream>
#include <map>
#include "leveldb/db.h"
#include "leveldb/merge_operator.h"
#include "utilities/merge_operators.h"
#include "utilities/merge_operators/string_append/stringappend.h"
#include "util/testharness.h"
#include "util/random.h"
using namespace leveldb;
namespace leveldb {
const std::string kDbName = "/tmp/mergetestdb"; // Path to the database on file system
// OpenDb opens a (possibly new) rocksdb database with a StringAppendOperator
std::shared_ptr<DB> OpenDb(StringAppendOperator* append_op) {
DB* db;
Options options;
options.create_if_missing = true;
options.merge_operator = append_op;
Status s = DB::Open(options, kDbName, &db);
if (!s.ok()) {
std::cerr << s.ToString() << std::endl;
assert(false);
}
return std::shared_ptr<DB>(db);
}
/// StringLists represents a set of string-lists, each with a key-index.
/// Supports Append(list,string) and Get(list)
class StringLists {
public:
//Constructor: specifies the rocksdb db
StringLists(std::shared_ptr<DB> db)
: db_(db),
merge_option_(),
get_option_() {
assert(db);
}
// Append string val onto the list defined by key; return true on success
bool Append(const std::string& key, const std::string& val){
Slice valSlice(val.data(),val.size());
auto s = db_->Merge(merge_option_,key,valSlice);
if (s.ok()) {
return true;
} else {
std::cerr << "ERROR " << s.ToString() << std::endl;
return false;
}
}
// Returns the list of strings associated with key (or "" if does not exist)
bool Get(const std::string& key, std::string* const result){
assert(result != NULL); // we should have a place to store the result
auto s = db_->Get(get_option_, key, result);
if (s.ok()) {
return true;
}
// Either key does not exist, or there is some error.
*result = ""; // Always return empty string (just for convenvtion)
//NotFound is okay; just return empty (similar to std::map)
//But network or db errors, etc, should fail the test (or at least yell)
if (s.ToString() != "NotFound: "){
std::cerr << "ERROR " << s.ToString() << std::endl;
}
// Always return false if s.ok() was not true
return false;
}
private:
std::shared_ptr<DB> db_;
WriteOptions merge_option_;
ReadOptions get_option_;
};
// THE TEST CASES BEGIN HERE
class StringAppendOperatorTest { };
TEST(StringAppendOperatorTest,SimpleTest) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op(',');
auto db = OpenDb(&append_op);
StringLists slists(db);
slists.Append("k1","v1");
slists.Append("k1","v2");
slists.Append("k1","v3");
std::string res;
bool status = slists.Get("k1",&res);
assert(status);
ASSERT_EQ(res,"v1,v2,v3");
}
TEST(StringAppendOperatorTest,SimpleDelimiterTest) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op('|');
auto db = OpenDb(&append_op);
StringLists slists(db);
slists.Append("k1","v1");
slists.Append("k1","v2");
slists.Append("k1","v3");
std::string res;
slists.Get("k1",&res);
ASSERT_EQ(res,"v1|v2|v3");
}
TEST(StringAppendOperatorTest,OneValueNoDelimiterTest) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op('!');
auto db = OpenDb(&append_op);
StringLists slists(db);
slists.Append("random_key","single_val");
std::string res;
slists.Get("random_key",&res);
ASSERT_EQ(res,"single_val");
}
TEST(StringAppendOperatorTest,VariousKeys) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op('\n');
auto db = OpenDb(&append_op);
StringLists slists(db);
slists.Append("c","asdasd");
slists.Append("a","x");
slists.Append("b","y");
slists.Append("a","t");
slists.Append("a","r");
slists.Append("b","2");
slists.Append("c","asdasd");
std::string a,b,c;
bool sa,sb,sc;
sa = slists.Get("a",&a);
sb = slists.Get("b",&b);
sc = slists.Get("c",&c);
assert(sa && sb && sc); // All three keys should have been found
ASSERT_EQ(a,"x\nt\nr");
ASSERT_EQ(b,"y\n2");
ASSERT_EQ(c,"asdasd\nasdasd");
}
// Generate semi random keys/words from a small distribution.
TEST(StringAppendOperatorTest,RandomMixGetAppend) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op(' ');
auto db = OpenDb(&append_op);
StringLists slists(db);
// Generate a list of random keys and values
const int kWordCount = 15;
std::string words[] = {"sdasd","triejf","fnjsdfn","dfjisdfsf","342839",
"dsuha","mabuais","sadajsid","jf9834hf","2d9j89",
"dj9823jd","a","dk02ed2dh","$(jd4h984$(*", "mabz"};
const int kKeyCount = 6;
std::string keys[] = {"dhaiusdhu","denidw","daisda","keykey","muki",
"shzassdianmd"};
// Will store a local copy of all data in order to verify correctness
std::map<std::string,std::string> parallel_copy;
// Generate a bunch of random queries (Append and Get)!
enum query_t { APPEND_OP, GET_OP, NUM_OPS };
Random randomGen(1337); //deterministic seed; always get same results!
const int kNumQueries = 30;
for (int q=0; q<kNumQueries; ++q) {
// Generate a random query (Append or Get) and random parameters
query_t query = (query_t)randomGen.Uniform((int)NUM_OPS);
std::string key = keys[randomGen.Uniform((int)kKeyCount)];
std::string word = words[randomGen.Uniform((int)kWordCount)];
// Debug message.
//std::cout << (int)query << " " << key << " " << word << std::endl;
// Apply the query and any checks.
if (query == APPEND_OP) {
// Apply the rocksdb test-harness Append defined above
slists.Append(key,word); //apply the rocksdb append
// Apply the similar "Append" to the parallel copy
if (parallel_copy[key].size() > 0) {
parallel_copy[key] += " " + word;
} else {
parallel_copy[key] = word;
}
} else if (query == GET_OP) {
// Assumes that a non-existent key just returns <empty>
std::string res;
slists.Get(key,&res);
ASSERT_EQ(res,parallel_copy[key]);
}
}
}
TEST(StringAppendOperatorTest,BIGRandomMixGetAppend) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op(' ');
auto db = OpenDb(&append_op);
StringLists slists(db);
// Generate a list of random keys and values
const int kWordCount = 15;
std::string words[] = {"sdasd","triejf","fnjsdfn","dfjisdfsf","342839",
"dsuha","mabuais","sadajsid","jf9834hf","2d9j89",
"dj9823jd","a","dk02ed2dh","$(jd4h984$(*", "mabz"};
const int kKeyCount = 6;
std::string keys[] = {"dhaiusdhu","denidw","daisda","keykey","muki",
"shzassdianmd"};
// Will store a local copy of all data in order to verify correctness
std::map<std::string,std::string> parallel_copy;
// Generate a bunch of random queries (Append and Get)!
enum query_t { APPEND_OP, GET_OP, NUM_OPS };
Random randomGen(9138204); //deterministic seed; always get same results!
const int kNumQueries = 1000;
for (int q=0; q<kNumQueries; ++q) {
// Generate a random query (Append or Get) and random parameters
query_t query = (query_t)randomGen.Uniform((int)NUM_OPS);
std::string key = keys[randomGen.Uniform((int)kKeyCount)];
std::string word = words[randomGen.Uniform((int)kWordCount)];
// Debug message.
//std::cout << (int)query << " " << key << " " << word << std::endl;
//Apply the query and any checks.
if (query == APPEND_OP) {
// Apply the rocksdb test-harness Append defined above
slists.Append(key,word); //apply the rocksdb append
// Apply the similar "Append" to the parallel copy
if (parallel_copy[key].size() > 0) {
parallel_copy[key] += " " + word;
} else {
parallel_copy[key] = word;
}
} else if (query == GET_OP) {
// Assumes that a non-existent key just returns <empty>
std::string res;
slists.Get(key,&res);
ASSERT_EQ(res,parallel_copy[key]);
}
}
}
TEST(StringAppendOperatorTest,PersistentVariousKeys) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op('\n');
// Perform the following operations in limited scope
{
auto db = OpenDb(&append_op);
StringLists slists(db);
slists.Append("c","asdasd");
slists.Append("a","x");
slists.Append("b","y");
slists.Append("a","t");
slists.Append("a","r");
slists.Append("b","2");
slists.Append("c","asdasd");
std::string a,b,c;
slists.Get("a",&a);
slists.Get("b",&b);
slists.Get("c",&c);
ASSERT_EQ(a,"x\nt\nr");
ASSERT_EQ(b,"y\n2");
ASSERT_EQ(c,"asdasd\nasdasd");
}
// Reopen the database (the previous changes should persist / be remembered)
{
auto db = OpenDb(&append_op);
StringLists slists(db);
slists.Append("c","bbnagnagsx");
slists.Append("a","sa");
slists.Append("b","df");
slists.Append("a","gh");
slists.Append("a","jk");
slists.Append("b","l;");
slists.Append("c","rogosh");
std::string a,b,c;
slists.Get("a",&a);
slists.Get("b",&b);
slists.Get("c",&c);
ASSERT_EQ(a,"x\nt\nr\nsa\ngh\njk");
ASSERT_EQ(b,"y\n2\ndf\nl;");
ASSERT_EQ(c,"asdasd\nasdasd\nbbnagnagsx\nrogosh");
}
}
TEST(StringAppendOperatorTest,PersistentFlushAndCompaction) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op('\n');
// Perform the following operations in limited scope
{
auto db = OpenDb(&append_op);
StringLists slists(db);
std::string a,b,c;
bool success;
// Append, Flush, Get
slists.Append("c","asdasd");
db->Flush(leveldb::FlushOptions());
success = slists.Get("c",&c);
assert(success == true);
ASSERT_EQ(c,"asdasd");
// Append, Flush, Append, Get
slists.Append("a","x");
slists.Append("b","y");
db->Flush(leveldb::FlushOptions());
slists.Append("a","t");
slists.Append("a","r");
slists.Append("b","2");
success = slists.Get("a",&a);
assert(success == true);
ASSERT_EQ(a,"x\nt\nr");
success = slists.Get("b",&b);
assert(success == true);
ASSERT_EQ(b,"y\n2");
// Append, Get
success = slists.Append("c","asdasd");
assert(success);
success = slists.Append("b","monkey");
assert(success);
// I omit the "assert(success)" checks here.
slists.Get("a",&a);
slists.Get("b",&b);
slists.Get("c",&c);
ASSERT_EQ(a,"x\nt\nr");
ASSERT_EQ(b,"y\n2\nmonkey");
ASSERT_EQ(c,"asdasd\nasdasd");
}
// Reopen the database (the previous changes should persist / be remembered)
{
auto db = OpenDb(&append_op);
StringLists slists(db);
std::string a,b,c;
// Get (Quick check for persistence of previous database)
slists.Get("a",&a);
ASSERT_EQ(a,"x\nt\nr");
//Append, Compact, Get
slists.Append("c","bbnagnagsx");
slists.Append("a","sa");
slists.Append("b","df");
db->CompactRange(nullptr,nullptr);
slists.Get("a",&a);
slists.Get("b",&b);
slists.Get("c",&c);
ASSERT_EQ(a,"x\nt\nr\nsa");
ASSERT_EQ(b,"y\n2\nmonkey\ndf");
ASSERT_EQ(c,"asdasd\nasdasd\nbbnagnagsx");
// Append, Get
slists.Append("a","gh");
slists.Append("a","jk");
slists.Append("b","l;");
slists.Append("c","rogosh");
slists.Get("a",&a);
slists.Get("b",&b);
slists.Get("c",&c);
ASSERT_EQ(a,"x\nt\nr\nsa\ngh\njk");
ASSERT_EQ(b,"y\n2\nmonkey\ndf\nl;");
ASSERT_EQ(c,"asdasd\nasdasd\nbbnagnagsx\nrogosh");
// Compact, Get
db->CompactRange(nullptr,nullptr);
ASSERT_EQ(a,"x\nt\nr\nsa\ngh\njk");
ASSERT_EQ(b,"y\n2\nmonkey\ndf\nl;");
ASSERT_EQ(c,"asdasd\nasdasd\nbbnagnagsx\nrogosh");
// Append, Flush, Compact, Get
slists.Append("b","afcg");
db->Flush(leveldb::FlushOptions());
db->CompactRange(nullptr,nullptr);
slists.Get("b",&b);
ASSERT_EQ(b,"y\n2\nmonkey\ndf\nl;\nafcg");
}
}
TEST(StringAppendOperatorTest,SimpleTestNullDelimiter) {
DestroyDB(kDbName, Options()); // Start this test with a fresh DB
StringAppendOperator append_op('\0');
auto db = OpenDb(&append_op);
StringLists slists(db);
slists.Append("k1","v1");
slists.Append("k1","v2");
slists.Append("k1","v3");
std::string res;
bool status = slists.Get("k1",&res);
assert(status);
// Construct the desired string. Default constructor doesn't like '\0' chars.
std::string checker("v1,v2,v3"); // Verify that the string is right size.
checker[2] = '\0'; // Use null delimiter instead of comma.
checker[5] = '\0';
assert(checker.size() == 8); // Verify it is still the correct size
// Check that the leveldb result string matches the desired string
assert(res.size() == checker.size());
ASSERT_EQ(res,checker);
}
} // namespace leveldb
int main(int arc, char** argv) {
leveldb::test::RunAllTests();
return 0;
}