JSON (Document) API sketch

Summary:
This is a rough sketch of our new document API. Would like to get some thoughts and comments about the high-level architecture and API.

I didn't optimize for performance at all. Leaving some low-hanging fruit so that we can be happy when we fix them! :)

Currently, bunch of features are not supported at all. Indexes can be only specified when creating database. There is no query planner whatsoever. This will all be added in due time.

Test Plan: Added a simple unit test

Reviewers: haobo, yhchiang, dhruba, sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D18747
This commit is contained in:
Igor Canadi 2014-07-10 09:31:42 -07:00
parent 222cf2555a
commit f0a8be253e
10 changed files with 1637 additions and 0 deletions

View File

@ -104,6 +104,7 @@ TESTS = \
stringappend_test \ stringappend_test \
ttl_test \ ttl_test \
backupable_db_test \ backupable_db_test \
document_db_test \
json_document_test \ json_document_test \
version_edit_test \ version_edit_test \
version_set_test \ version_set_test \
@ -345,6 +346,9 @@ prefix_test: db/prefix_test.o $(LIBOBJECTS) $(TESTHARNESS)
backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS) backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) $(CXX) utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
document_db_test: utilities/document/document_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) utilities/document/document_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
json_document_test: utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) json_document_test: utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) $(CXX) utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

View File

@ -236,6 +236,23 @@ void WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key) {
WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family), key); WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family), key);
} }
void WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id,
const SliceParts& key) {
WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
if (column_family_id == 0) {
b->rep_.push_back(static_cast<char>(kTypeDeletion));
} else {
b->rep_.push_back(static_cast<char>(kTypeColumnFamilyDeletion));
PutVarint32(&b->rep_, column_family_id);
}
PutLengthPrefixedSliceParts(&b->rep_, key);
}
void WriteBatch::Delete(ColumnFamilyHandle* column_family,
const SliceParts& key) {
WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family), key);
}
void WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id, void WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
const Slice& key, const Slice& value) { const Slice& key, const Slice& value) {
WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);

View File

@ -71,6 +71,9 @@ class WriteBatchInternal {
static void Put(WriteBatch* batch, uint32_t column_family_id, static void Put(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key, const SliceParts& value); const SliceParts& key, const SliceParts& value);
static void Delete(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key);
static void Delete(WriteBatch* batch, uint32_t column_family_id, static void Delete(WriteBatch* batch, uint32_t column_family_id,
const Slice& key); const Slice& key);

View File

@ -107,6 +107,7 @@ class Slice {
struct SliceParts { struct SliceParts {
SliceParts(const Slice* _parts, int _num_parts) : SliceParts(const Slice* _parts, int _num_parts) :
parts(_parts), num_parts(_num_parts) { } parts(_parts), num_parts(_num_parts) { }
SliceParts() : parts(nullptr), num_parts(0) {}
const Slice* parts; const Slice* parts;
int num_parts; int num_parts;

View File

@ -67,6 +67,10 @@ class WriteBatch {
void Delete(ColumnFamilyHandle* column_family, const Slice& key); void Delete(ColumnFamilyHandle* column_family, const Slice& key);
void Delete(const Slice& key) { Delete(nullptr, key); } void Delete(const Slice& key) { Delete(nullptr, key); }
// variant that takes SliceParts
void Delete(ColumnFamilyHandle* column_family, const SliceParts& key);
void Delete(const SliceParts& key) { Delete(nullptr, key); }
// Append a blob of arbitrary size to the records in this batch. The blob will // Append a blob of arbitrary size to the records in this batch. The blob will
// be stored in the transaction log but not in any other file. In particular, // be stored in the transaction log but not in any other file. In particular,
// it will not be persisted to the SST files. When iterating over this // it will not be persisted to the SST files. When iterating over this

View File

@ -0,0 +1,149 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include <vector>
#include "utilities/stackable_db.h"
#include "utilities/json_document.h"
#include "rocksdb/db.h"
namespace rocksdb {
// IMPORTANT: DocumentDB is a work in progress. It is unstable and we might
// change the API without warning. Talk to RocksDB team before using this in
// production ;)
// DocumentDB is a layer on top of RocksDB that provides a very simple JSON API.
// When creating a DB, you specify a list of indexes you want to keep on your
// data. You can insert a JSON document to the DB, which is automatically
// indexed. Every document added to the DB needs to have "_id" field which is
// automatically indexed and is an unique primary key. All other indexes are
// non-unique.
// NOTE: field names in the JSON are NOT allowed to start with '$' or
// contain '.'. We don't currently enforce that rule, but will start behaving
// badly.
// Cursor is what you get as a result of executing query. To get all
// results from a query, call Next() on a Cursor while Valid() returns true
class Cursor {
public:
Cursor() = default;
virtual ~Cursor() {}
virtual bool Valid() const = 0;
virtual void Next() = 0;
// Lifecycle of the returned JSONDocument is until the next Next() call
virtual const JSONDocument& document() const = 0;
virtual Status status() const = 0;
private:
// No copying allowed
Cursor(const Cursor&);
void operator=(const Cursor&);
};
struct DocumentDBOptions {
int background_threads = 4;
uint64_t memtable_size = 128 * 1024 * 1024; // 128 MB
uint64_t cache_size = 1 * 1024 * 1024 * 1024; // 1 GB
};
// TODO(icanadi) Add `JSONDocument* info` parameter to all calls that can be
// used by the caller to get more information about the call execution (number
// of dropped records, number of updated records, etc.)
class DocumentDB : public StackableDB {
public:
struct IndexDescriptor {
// Currently, you can only define an index on a single field. To specify an
// index on a field X, set index description to JSON "{X: 1}"
// Currently the value needs to be 1, which means ascending.
// In the future, we plan to also support indexes on multiple keys, where
// you could mix ascending sorting (1) with descending sorting indexes (-1)
JSONDocument* description;
std::string name;
};
// Open DocumentDB with specified indexes. The list of indexes has to be
// complete, i.e. include all indexes present in the DB, except the primary
// key index.
// Otherwise, Open() will return an error
static Status Open(const DocumentDBOptions& options, const std::string& name,
const std::vector<IndexDescriptor>& indexes,
DocumentDB** db, bool read_only = false);
explicit DocumentDB(DB* db) : StackableDB(db) {}
// Create a new index. It will stop all writes for the duration of the call.
// All current documents in the DB are scanned and corresponding index entries
// are created
virtual Status CreateIndex(const WriteOptions& write_options,
const IndexDescriptor& index) = 0;
// Drop an index. Client is responsible to make sure that index is not being
// used by currently executing queries
virtual Status DropIndex(const std::string& name) = 0;
// Insert a document to the DB. The document needs to have a primary key "_id"
// which can either be a string or an integer. Otherwise the write will fail
// with InvalidArgument.
virtual Status Insert(const WriteOptions& options,
const JSONDocument& document) = 0;
// Deletes all documents matching a filter atomically
virtual Status Remove(const ReadOptions& read_options,
const WriteOptions& write_options,
const JSONDocument& query) = 0;
// Does this sequence of operations:
// 1. Find all documents matching a filter
// 2. For all documents, atomically:
// 2.1. apply the update operators
// 2.2. update the secondary indexes
//
// Currently only $set update operator is supported.
// Syntax is: {$set: {key1: value1, key2: value2, etc...}}
// This operator will change a document's key1 field to value1, key2 to
// value2, etc. New values will be set even if a document didn't have an entry
// for the specified key.
//
// You can not change a primary key of a document.
//
// Update example: Update({id: {$gt: 5}, $index: id}, {$set: {enabled: true}})
virtual Status Update(const ReadOptions& read_options,
const WriteOptions& write_options,
const JSONDocument& filter,
const JSONDocument& updates) = 0;
// query has to be an array in which every element is an operator. Currently
// only $filter operator is supported. Syntax of $filter operator is:
// {$filter: {key1: condition1, key2: condition2, etc.}} where conditions can
// be either:
// 1) a single value in which case the condition is equality condition, or
// 2) a defined operators, like {$gt: 4}, which will match all documents that
// have key greater than 4.
//
// Supported operators are:
// 1) $gt -- greater than
// 2) $gte -- greater than or equal
// 3) $lt -- less than
// 4) $lte -- less than or equal
// If you want the filter to use an index, you need to specify it like this:
// {$filter: {...(conditions)..., $index: index_name}}
//
// Example query:
// * [{$filter: {name: John, age: {$gte: 18}, $index: age}}]
// will return all Johns whose age is greater or equal to 18 and it will use
// index "age" to satisfy the query.
virtual Cursor* Query(const ReadOptions& read_options,
const JSONDocument& query) = 0;
};
} // namespace rocksdb
#endif // ROCKSDB_LITE

View File

@ -99,6 +99,8 @@ class JSONDocument {
bool operator==(const JSONDocument& rhs) const; bool operator==(const JSONDocument& rhs) const;
std::string DebugString() const;
private: private:
class ItemsIteratorGenerator; class ItemsIteratorGenerator;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,262 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#include <algorithm>
#include "utilities/json_document.h"
#include "utilities/document_db.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace rocksdb {
class DocumentDBTest {
public:
DocumentDBTest() {
dbname_ = test::TmpDir() + "/document_db_test";
DestroyDB(dbname_, Options());
}
~DocumentDBTest() {
delete db_;
DestroyDB(dbname_, Options());
}
void AssertCursorIDs(Cursor* cursor, std::vector<int64_t> expected) {
std::vector<int64_t> got;
while (cursor->Valid()) {
ASSERT_TRUE(cursor->Valid());
ASSERT_TRUE(cursor->document().Contains("_id"));
got.push_back(cursor->document()["_id"].GetInt64());
cursor->Next();
}
std::sort(expected.begin(), expected.end());
std::sort(got.begin(), got.end());
ASSERT_TRUE(got == expected);
}
// converts ' to ", so that we don't have to escape " all over the place
std::string ConvertQuotes(const std::string& input) {
std::string output;
for (auto x : input) {
if (x == '\'') {
output.push_back('\"');
} else {
output.push_back(x);
}
}
return output;
}
void CreateIndexes(std::vector<DocumentDB::IndexDescriptor> indexes) {
for (auto i : indexes) {
ASSERT_OK(db_->CreateIndex(WriteOptions(), i));
}
}
JSONDocument* Parse(const std::string doc) {
return JSONDocument::ParseJSON(ConvertQuotes(doc).c_str());
}
std::string dbname_;
DocumentDB* db_;
};
TEST(DocumentDBTest, SimpleQueryTest) {
DocumentDBOptions options;
DocumentDB::IndexDescriptor index;
index.description = Parse("{'name': 1}");
index.name = "name_index";
ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_));
CreateIndexes({index});
delete db_;
// now there is index present
ASSERT_OK(DocumentDB::Open(options, dbname_, {index}, &db_));
delete index.description;
std::vector<std::string> json_objects = {
"{'_id': 1, 'name': 'One'}", "{'_id': 2, 'name': 'Two'}",
"{'_id': 3, 'name': 'Three'}", "{'_id': 4, 'name': 'Four'}"};
for (auto& json : json_objects) {
std::unique_ptr<JSONDocument> document(Parse(json));
ASSERT_TRUE(document.get() != nullptr);
ASSERT_OK(db_->Insert(WriteOptions(), *document));
}
// inserting a document with existing primary key should return failure
{
std::unique_ptr<JSONDocument> document(Parse(json_objects[0]));
ASSERT_TRUE(document.get() != nullptr);
Status s = db_->Insert(WriteOptions(), *document);
ASSERT_TRUE(s.IsInvalidArgument());
}
// find equal to "Two"
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'name': 'Two', '$index': 'name_index'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2});
}
// find less than "Three"
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'name': {'$lt': 'Three'}, '$index': "
"'name_index'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {1, 4});
}
// find less than "Three" without index
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'name': {'$lt': 'Three'} }}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {1, 4});
}
// remove less or equal to "Three"
{
std::unique_ptr<JSONDocument> query(
Parse("{'name': {'$lte': 'Three'}, '$index': 'name_index'}"));
ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query));
}
// find all -- only "Two" left, everything else should be deleted
{
std::unique_ptr<JSONDocument> query(Parse("[]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2});
}
}
TEST(DocumentDBTest, ComplexQueryTest) {
DocumentDBOptions options;
DocumentDB::IndexDescriptor priority_index;
priority_index.description = Parse("{'priority': 1}");
priority_index.name = "priority";
DocumentDB::IndexDescriptor job_name_index;
job_name_index.description = Parse("{'job_name': 1}");
job_name_index.name = "job_name";
DocumentDB::IndexDescriptor progress_index;
progress_index.description = Parse("{'progress': 1}");
progress_index.name = "progress";
ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_));
CreateIndexes({priority_index, progress_index});
delete priority_index.description;
delete progress_index.description;
std::vector<std::string> json_objects = {
"{'_id': 1, 'job_name': 'play', 'priority': 10, 'progress': 14.2}",
"{'_id': 2, 'job_name': 'white', 'priority': 2, 'progress': 45.1}",
"{'_id': 3, 'job_name': 'straw', 'priority': 5, 'progress': 83.2}",
"{'_id': 4, 'job_name': 'temporary', 'priority': 3, 'progress': 14.9}",
"{'_id': 5, 'job_name': 'white', 'priority': 4, 'progress': 44.2}",
"{'_id': 6, 'job_name': 'tea', 'priority': 1, 'progress': 12.4}",
"{'_id': 7, 'job_name': 'delete', 'priority': 2, 'progress': 77.54}",
"{'_id': 8, 'job_name': 'rock', 'priority': 3, 'progress': 93.24}",
"{'_id': 9, 'job_name': 'steady', 'priority': 3, 'progress': 9.1}",
"{'_id': 10, 'job_name': 'white', 'priority': 1, 'progress': 61.4}",
"{'_id': 11, 'job_name': 'who', 'priority': 4, 'progress': 39.41}", };
// add index on the fly!
CreateIndexes({job_name_index});
delete job_name_index.description;
for (auto& json : json_objects) {
std::unique_ptr<JSONDocument> document(Parse(json));
ASSERT_TRUE(document != nullptr);
ASSERT_OK(db_->Insert(WriteOptions(), *document));
}
// 2 < priority < 4 AND progress > 10.0, index priority
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': "
"10.0}, '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 8});
}
// 2 < priority < 4 AND progress > 10.0, index progress
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': "
"10.0}, '$index': 'progress'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 8});
}
// job_name == 'white' AND priority >= 2, index job_name
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'job_name': 'white', 'priority': {'$gte': "
"2}, '$index': 'job_name'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2, 5});
}
// 35.0 <= progress < 65.5, index progress
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'progress': {'$gt': 5.0, '$gte': 35.0, '$lt': 65.5}, "
"'$index': 'progress'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2, 5, 10, 11});
}
// 2 < priority <= 4, index priority
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$gt': 2, '$lt': 8, '$lte': 4}, "
"'$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 5, 8, 9, 11});
}
// Delete all whose progress is bigger than 50%
{
std::unique_ptr<JSONDocument> query(
Parse("{'progress': {'$gt': 50.0}, '$index': 'progress'}"));
ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query));
}
// 2 < priority < 6, index priority
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$gt': 2, '$lt': 6}, "
"'$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 5, 9, 11});
}
// update set priority to 10 where job_name is 'white'
{
std::unique_ptr<JSONDocument> query(Parse("{'job_name': 'white'}"));
std::unique_ptr<JSONDocument> update(Parse("{'$set': {'priority': 10}}"));
ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update));
}
// 4 < priority
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'priority': {'$gt': 4}, '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
ASSERT_OK(cursor->status());
AssertCursorIDs(cursor.get(), {1, 2, 5});
}
Status s = db_->DropIndex("doesnt-exist");
ASSERT_TRUE(!s.ok());
ASSERT_OK(db_->DropIndex("priority"));
}
} // namespace rocksdb
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }

View File

@ -6,6 +6,8 @@
#include "utilities/json_document.h" #include "utilities/json_document.h"
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <cassert> #include <cassert>
#include <string> #include <string>
#include <map> #include <map>
@ -267,6 +269,58 @@ bool JSONDocument::operator==(const JSONDocument& rhs) const {
return false; return false;
} }
std::string JSONDocument::DebugString() const {
std::string ret;
switch (type_) {
case kNull:
ret = "null";
break;
case kArray:
ret = "[";
for (size_t i = 0; i < data_.a.size(); ++i) {
if (i) {
ret += ", ";
}
ret += data_.a[i]->DebugString();
}
ret += "]";
break;
case kBool:
ret = data_.b ? "true" : "false";
break;
case kDouble: {
char buf[100];
snprintf(buf, sizeof(buf), "%lf", data_.d);
ret = buf;
break;
}
case kInt64: {
char buf[100];
snprintf(buf, sizeof(buf), "%" PRIi64, data_.i);
ret = buf;
break;
}
case kObject: {
bool first = true;
ret = "{";
for (const auto& iter : data_.o) {
ret += first ? "" : ", ";
first = false;
ret += iter.first + ": ";
ret += iter.second->DebugString();
}
ret += "}";
break;
}
case kString:
ret = "\"" + data_.s + "\"";
break;
default:
assert(false);
}
return ret;
}
JSONDocument::ItemsIteratorGenerator JSONDocument::Items() const { JSONDocument::ItemsIteratorGenerator JSONDocument::Items() const {
assert(type_ == kObject); assert(type_ == kObject);
return data_.o; return data_.o;