From 1fb2e274c50ba4f9d49581fd5eaeb39baa3d2df3 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Thu, 10 Jan 2019 13:23:35 -0800 Subject: [PATCH] Remove some components (#4101) Summary: Remove some components that we never heard people using them. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4101 Differential Revision: D8825431 Pulled By: siying fbshipit-source-id: 97a12ad3cad4ab12c82741a5ba49669aaa854180 --- CMakeLists.txt | 18 - HISTORY.md | 1 + Makefile | 37 +- TARGETS | 40 - include/rocksdb/utilities/date_tiered_db.h | 108 -- include/rocksdb/utilities/document_db.h | 149 --- include/rocksdb/utilities/geo_db.h | 114 -- include/rocksdb/utilities/json_document.h | 195 --- include/rocksdb/utilities/spatial_db.h | 261 ---- src.mk | 21 - utilities/col_buf_decoder.cc | 240 ---- utilities/col_buf_decoder.h | 119 -- utilities/col_buf_encoder.cc | 210 --- utilities/col_buf_encoder.h | 219 ---- utilities/column_aware_encoding_exp.cc | 176 --- utilities/column_aware_encoding_test.cc | 254 ---- utilities/column_aware_encoding_util.cc | 491 ------- utilities/column_aware_encoding_util.h | 81 -- utilities/date_tiered/date_tiered_db_impl.cc | 399 ------ utilities/date_tiered/date_tiered_db_impl.h | 93 -- utilities/date_tiered/date_tiered_test.cc | 469 ------- utilities/document/document_db.cc | 1207 ------------------ utilities/document/document_db_test.cc | 338 ----- utilities/document/json_document.cc | 610 --------- utilities/document/json_document_builder.cc | 120 -- utilities/document/json_document_test.cc | 343 ----- utilities/geodb/geodb_impl.cc | 478 ------- utilities/geodb/geodb_impl.h | 185 --- utilities/geodb/geodb_test.cc | 201 --- utilities/redis/README | 14 - utilities/redis/redis_list_exception.h | 22 - utilities/redis/redis_list_iterator.h | 309 ----- utilities/redis/redis_lists.cc | 552 -------- utilities/redis/redis_lists.h | 108 -- utilities/redis/redis_lists_test.cc | 894 ------------- utilities/spatialdb/spatial_db.cc | 919 ------------- utilities/spatialdb/spatial_db_test.cc | 307 ----- utilities/spatialdb/utils.h | 95 -- 38 files changed, 4 insertions(+), 10393 deletions(-) delete mode 100644 include/rocksdb/utilities/date_tiered_db.h delete mode 100644 include/rocksdb/utilities/document_db.h delete mode 100644 include/rocksdb/utilities/geo_db.h delete mode 100644 include/rocksdb/utilities/json_document.h delete mode 100644 include/rocksdb/utilities/spatial_db.h delete mode 100644 utilities/col_buf_decoder.cc delete mode 100644 utilities/col_buf_decoder.h delete mode 100644 utilities/col_buf_encoder.cc delete mode 100644 utilities/col_buf_encoder.h delete mode 100644 utilities/column_aware_encoding_exp.cc delete mode 100644 utilities/column_aware_encoding_test.cc delete mode 100644 utilities/column_aware_encoding_util.cc delete mode 100644 utilities/column_aware_encoding_util.h delete mode 100644 utilities/date_tiered/date_tiered_db_impl.cc delete mode 100644 utilities/date_tiered/date_tiered_db_impl.h delete mode 100644 utilities/date_tiered/date_tiered_test.cc delete mode 100644 utilities/document/document_db.cc delete mode 100644 utilities/document/document_db_test.cc delete mode 100644 utilities/document/json_document.cc delete mode 100644 utilities/document/json_document_builder.cc delete mode 100644 utilities/document/json_document_test.cc delete mode 100644 utilities/geodb/geodb_impl.cc delete mode 100644 utilities/geodb/geodb_impl.h delete mode 100644 utilities/geodb/geodb_test.cc delete mode 100644 utilities/redis/README delete mode 100644 utilities/redis/redis_list_exception.h delete mode 100644 utilities/redis/redis_list_iterator.h delete mode 100644 utilities/redis/redis_lists.cc delete mode 100644 utilities/redis/redis_lists.h delete mode 100644 utilities/redis/redis_lists_test.cc delete mode 100644 utilities/spatialdb/spatial_db.cc delete mode 100644 utilities/spatialdb/spatial_db_test.cc delete mode 100644 utilities/spatialdb/utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5167ca9b2..b38ab3d93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -643,18 +643,10 @@ set(SOURCES utilities/cassandra/format.cc utilities/cassandra/merge_operator.cc utilities/checkpoint/checkpoint_impl.cc - utilities/col_buf_decoder.cc - utilities/col_buf_encoder.cc - utilities/column_aware_encoding_util.cc utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc - utilities/date_tiered/date_tiered_db_impl.cc utilities/debug.cc - utilities/document/document_db.cc - utilities/document/json_document.cc - utilities/document/json_document_builder.cc utilities/env_mirror.cc utilities/env_timed.cc - utilities/geodb/geodb_impl.cc utilities/leveldb_options/leveldb_options.cc utilities/lua/rocks_lua_compaction_filter.cc utilities/memory/memory_util.cc @@ -671,9 +663,7 @@ set(SOURCES utilities/persistent_cache/block_cache_tier_metadata.cc utilities/persistent_cache/persistent_cache_tier.cc utilities/persistent_cache/volatile_tier_impl.cc - utilities/redis/redis_lists.cc utilities/simulator_cache/sim_cache.cc - utilities/spatialdb/spatial_db.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc utilities/trace/file_trace_reader_writer.cc utilities/transactions/optimistic_transaction_db_impl.cc @@ -975,11 +965,6 @@ if(WITH_TESTS) utilities/cassandra/cassandra_row_merge_test.cc utilities/cassandra/cassandra_serialize_test.cc utilities/checkpoint/checkpoint_test.cc - utilities/column_aware_encoding_test.cc - utilities/date_tiered/date_tiered_test.cc - utilities/document/document_db_test.cc - utilities/document/json_document_test.cc - utilities/geodb/geodb_test.cc utilities/lua/rocks_lua_test.cc utilities/memory/memory_test.cc utilities/merge_operators/string_append/stringappend_test.cc @@ -988,8 +973,6 @@ if(WITH_TESTS) utilities/options/options_util_test.cc utilities/persistent_cache/hash_table_test.cc utilities/persistent_cache/persistent_cache_test.cc - utilities/redis/redis_lists_test.cc - utilities/spatialdb/spatial_db_test.cc utilities/simulator_cache/sim_cache_test.cc utilities/table_properties_collectors/compact_on_deletion_collector_test.cc utilities/transactions/optimistic_transaction_test.cc @@ -1009,7 +992,6 @@ if(WITH_TESTS) db/range_del_aggregator_bench.cc tools/db_bench.cc table/table_reader_bench.cc - utilities/column_aware_encoding_exp.cc utilities/persistent_cache/hash_table_bench.cc) add_library(testharness OBJECT util/testharness.cc) foreach(sourcefile ${BENCHMARKS}) diff --git a/HISTORY.md b/HISTORY.md index 63f3a5d9c..233f5f1b1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ ### Public API Change * Transaction::GetForUpdate is extended with a do_validate parameter with default value of true. If false it skips validating the snapshot before doing the read. Similarly ::Merge, ::Put, ::Delete, and ::SingleDelete are extended with assume_tracked with default value of false. If true it indicates that call is assumed to be after a ::GetForUpdate. * `TableProperties::num_entries` and `TableProperties::num_deletions` now also account for number of range tombstones. +* Remove geodb, spatial_db, document_db, json_document, date_tiered_db, and redis_lists. ### Bug Fixes * Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls. diff --git a/Makefile b/Makefile index 0c6c2c463..6b5f2e07c 100644 --- a/Makefile +++ b/Makefile @@ -403,7 +403,7 @@ BENCHTOOLOBJECTS = $(BENCH_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) ANALYZETOOLOBJECTS = $(ANALYZER_LIB_SOURCES:.cc=.o) -EXPOBJECTS = $(EXP_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) +EXPOBJECTS = $(LIBOBJECTS) $(TESTUTIL) TESTS = \ db_basic_test \ @@ -482,7 +482,6 @@ TESTS = \ merger_test \ util_merge_operators_test \ options_file_test \ - redis_test \ reduce_levels_test \ plain_table_db_test \ comparator_db_test \ @@ -496,12 +495,8 @@ TESTS = \ cassandra_row_merge_test \ cassandra_serialize_test \ ttl_test \ - date_tiered_test \ backupable_db_test \ - document_db_test \ - json_document_test \ sim_cache_test \ - spatial_db_test \ version_edit_test \ version_set_test \ compaction_picker_test \ @@ -513,7 +508,6 @@ TESTS = \ deletefile_test \ obsolete_files_test \ table_test \ - geodb_test \ delete_scheduler_test \ options_test \ options_settable_test \ @@ -530,7 +524,6 @@ TESTS = \ compaction_job_test \ thread_list_test \ sst_dump_test \ - column_aware_encoding_test \ compact_files_test \ optimistic_transaction_test \ write_callback_test \ @@ -604,7 +597,7 @@ TEST_LIBS = \ librocksdb_env_basic_test.a # TODO: add back forward_iterator_bench, after making it build in all environemnts. -BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench column_aware_encoding_exp persistent_cache_bench range_del_aggregator_bench +BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench persistent_cache_bench range_del_aggregator_bench # if user didn't config LIBNAME, set the default ifeq ($(LIBNAME),) @@ -1153,9 +1146,6 @@ cassandra_row_merge_test: utilities/cassandra/cassandra_row_merge_test.o utiliti cassandra_serialize_test: utilities/cassandra/cassandra_serialize_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -redis_test: utilities/redis/redis_lists_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_LINK) - hash_table_test: utilities/persistent_cache/hash_table_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) @@ -1294,18 +1284,9 @@ backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TE checkpoint_test: utilities/checkpoint/checkpoint_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -document_db_test: utilities/document/document_db_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_LINK) - -json_document_test: utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_LINK) - sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -spatial_db_test: utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_LINK) - env_mirror_test: utilities/env_mirror_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) @@ -1323,9 +1304,6 @@ object_registry_test: utilities/object_registry_test.o $(LIBOBJECTS) $(TESTHARNE ttl_test: utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -date_tiered_test: utilities/date_tiered/date_tiered_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_LINK) - write_batch_with_index_test: utilities/write_batch_with_index/write_batch_with_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) @@ -1452,9 +1430,6 @@ deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS) obsolete_files_test: db/obsolete_files_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_LINK) - rocksdb_dump: tools/dump/rocksdb_dump.o $(LIBOBJECTS) $(AM_LINK) @@ -1503,9 +1478,6 @@ timer_queue_test: util/timer_queue_test.o $(LIBOBJECTS) $(TESTHARNESS) sst_dump_test: tools/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -column_aware_encoding_test: utilities/column_aware_encoding_test.o $(TESTHARNESS) $(EXPOBJECTS) - $(AM_LINK) - optimistic_transaction_test: utilities/transactions/optimistic_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) @@ -1545,9 +1517,6 @@ sst_dump: tools/sst_dump.o $(LIBOBJECTS) blob_dump: tools/blob_dump.o $(LIBOBJECTS) $(AM_LINK) -column_aware_encoding_exp: utilities/column_aware_encoding_exp.o $(EXPOBJECTS) - $(AM_LINK) - repair_test: db/repair_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) @@ -1973,7 +1942,7 @@ endif # Source files dependencies detection # --------------------------------------------------------------------------- -all_sources = $(LIB_SOURCES) $(MAIN_SOURCES) $(MOCK_LIB_SOURCES) $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(TEST_LIB_SOURCES) $(EXP_LIB_SOURCES) $(ANALYZER_LIB_SOURCES) +all_sources = $(LIB_SOURCES) $(MAIN_SOURCES) $(MOCK_LIB_SOURCES) $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(TEST_LIB_SOURCES) $(ANALYZER_LIB_SOURCES) DEPFILES = $(all_sources:.cc=.cc.d) # Add proper dependency support so changing a .h file forces a .cc file to diff --git a/TARGETS b/TARGETS index 7df9e3848..20a004598 100644 --- a/TARGETS +++ b/TARGETS @@ -255,14 +255,9 @@ cpp_library( "utilities/checkpoint/checkpoint_impl.cc", "utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc", "utilities/convenience/info_log_finder.cc", - "utilities/date_tiered/date_tiered_db_impl.cc", "utilities/debug.cc", - "utilities/document/document_db.cc", - "utilities/document/json_document.cc", - "utilities/document/json_document_builder.cc", "utilities/env_mirror.cc", "utilities/env_timed.cc", - "utilities/geodb/geodb_impl.cc", "utilities/leveldb_options/leveldb_options.cc", "utilities/lua/rocks_lua_compaction_filter.cc", "utilities/memory/memory_util.cc", @@ -279,9 +274,7 @@ cpp_library( "utilities/persistent_cache/block_cache_tier_metadata.cc", "utilities/persistent_cache/persistent_cache_tier.cc", "utilities/persistent_cache/volatile_tier_impl.cc", - "utilities/redis/redis_lists.cc", "utilities/simulator_cache/sim_cache.cc", - "utilities/spatialdb/spatial_db.cc", "utilities/table_properties_collectors/compact_on_deletion_collector.cc", "utilities/trace/file_trace_reader_writer.cc", "utilities/transactions/optimistic_transaction.cc", @@ -319,9 +312,6 @@ cpp_library( "util/testharness.cc", "util/testutil.cc", "utilities/cassandra/test_utils.cc", - "utilities/col_buf_decoder.cc", - "utilities/col_buf_encoder.cc", - "utilities/column_aware_encoding_util.cc", ], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, @@ -444,11 +434,6 @@ ROCKS_TESTS = [ "util/coding_test.cc", "serial", ], - [ - "column_aware_encoding_test", - "utilities/column_aware_encoding_test.cc", - "serial", - ], [ "column_family_test", "db/column_family_test.cc", @@ -519,11 +504,6 @@ ROCKS_TESTS = [ "table/data_block_hash_index_test.cc", "serial", ], - [ - "date_tiered_test", - "utilities/date_tiered/date_tiered_test.cc", - "serial", - ], [ "db_basic_test", "db/db_basic_test.cc", @@ -684,11 +664,6 @@ ROCKS_TESTS = [ "db/deletefile_test.cc", "serial", ], - [ - "document_db_test", - "utilities/document/document_db_test.cc", - "serial", - ], [ "dynamic_bloom_test", "util/dynamic_bloom_test.cc", @@ -764,11 +739,6 @@ ROCKS_TESTS = [ "table/full_filter_block_test.cc", "serial", ], - [ - "geodb_test", - "utilities/geodb/geodb_test.cc", - "serial", - ], [ "hash_table_test", "utilities/persistent_cache/hash_table_test.cc", @@ -799,11 +769,6 @@ ROCKS_TESTS = [ "monitoring/iostats_context_test.cc", "serial", ], - [ - "json_document_test", - "utilities/document/json_document_test.cc", - "serial", - ], [ "ldb_cmd_test", "tools/ldb_cmd_test.cc", @@ -969,11 +934,6 @@ ROCKS_TESTS = [ "util/slice_transform_test.cc", "serial", ], - [ - "spatial_db_test", - "utilities/spatialdb/spatial_db_test.cc", - "serial", - ], [ "sst_dump_test", "tools/sst_dump_test.cc", diff --git a/include/rocksdb/utilities/date_tiered_db.h b/include/rocksdb/utilities/date_tiered_db.h deleted file mode 100644 index f259b05a8..000000000 --- a/include/rocksdb/utilities/date_tiered_db.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#ifndef ROCKSDB_LITE - -#include -#include -#include - -#include "rocksdb/db.h" - -namespace rocksdb { - -// Date tiered database is a wrapper of DB that implements -// a simplified DateTieredCompactionStrategy by using multiple column famillies -// as time windows. -// -// DateTieredDB provides an interface similar to DB, but it assumes that user -// provides keys with last 8 bytes encoded as timestamp in seconds. DateTieredDB -// is assigned with a TTL to declare when data should be deleted. -// -// DateTieredDB hides column families layer from standard RocksDB instance. It -// uses multiple column families to manage time series data, each containing a -// specific range of time. Column families are named by its maximum possible -// timestamp. A column family is created automatically when data newer than -// latest timestamp of all existing column families. The time range of a column -// family is configurable by `column_family_interval`. By doing this, we -// guarantee that compaction will only happen in a column family. -// -// DateTieredDB is assigned with a TTL. When all data in a column family are -// expired (CF_Timestamp <= CUR_Timestamp - TTL), we directly drop the whole -// column family. -// -// TODO(jhli): This is only a simplified version of DTCS. In a complete DTCS, -// time windows can be merged over time, so that older time windows will have -// larger time range. Also, compaction are executed only for adjacent SST files -// to guarantee there is no time overlap between SST files. - -class DateTieredDB { - public: - // Open a DateTieredDB whose name is `dbname`. - // Similar to DB::Open(), created database object is stored in dbptr. - // - // Two parameters can be configured: `ttl` to specify the length of time that - // keys should exist in the database, and `column_family_interval` to specify - // the time range of a column family interval. - // - // Open a read only database if read only is set as true. - // TODO(jhli): Should use an option object that includes ttl and - // column_family_interval. - static Status Open(const Options& options, const std::string& dbname, - DateTieredDB** dbptr, int64_t ttl, - int64_t column_family_interval, bool read_only = false); - - explicit DateTieredDB() {} - - virtual ~DateTieredDB() {} - - // Wrapper for Put method. Similar to DB::Put(), but column family to be - // inserted is decided by the timestamp in keys, i.e. the last 8 bytes of user - // key. If key is already obsolete, it will not be inserted. - // - // When client put a key value pair in DateTieredDB, it assumes last 8 bytes - // of keys are encoded as timestamp. Timestamp is a 64-bit signed integer - // encoded as the number of seconds since 1970-01-01 00:00:00 (UTC) (Same as - // Env::GetCurrentTime()). Timestamp should be encoded in big endian. - virtual Status Put(const WriteOptions& options, const Slice& key, - const Slice& val) = 0; - - // Wrapper for Get method. Similar to DB::Get() but column family is decided - // by timestamp in keys. If key is already obsolete, it will not be found. - virtual Status Get(const ReadOptions& options, const Slice& key, - std::string* value) = 0; - - // Wrapper for Delete method. Similar to DB::Delete() but column family is - // decided by timestamp in keys. If key is already obsolete, return NotFound - // status. - virtual Status Delete(const WriteOptions& options, const Slice& key) = 0; - - // Wrapper for KeyMayExist method. Similar to DB::KeyMayExist() but column - // family is decided by timestamp in keys. Return false when key is already - // obsolete. - virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, - std::string* value, bool* value_found = nullptr) = 0; - - // Wrapper for Merge method. Similar to DB::Merge() but column family is - // decided by timestamp in keys. - virtual Status Merge(const WriteOptions& options, const Slice& key, - const Slice& value) = 0; - - // Create an iterator that hides low level details. This iterator internally - // merge results from all active time series column families. Note that - // column families are not deleted until all data are obsolete, so this - // iterator can possibly access obsolete key value pairs. - virtual Iterator* NewIterator(const ReadOptions& opts) = 0; - - // Explicitly drop column families in which all keys are obsolete. This - // process is also inplicitly done in Put() operation. - virtual Status DropObsoleteColumnFamilies() = 0; - - static const uint64_t kTSLength = sizeof(int64_t); // size of timestamp -}; - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/include/rocksdb/utilities/document_db.h b/include/rocksdb/utilities/document_db.h deleted file mode 100644 index 3668a50b9..000000000 --- a/include/rocksdb/utilities/document_db.h +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#ifndef ROCKSDB_LITE - -#include -#include - -#include "rocksdb/utilities/stackable_db.h" -#include "rocksdb/utilities/json_document.h" -#include "rocksdb/db.h" - -namespace rocksdb { - -// IMPORTANT: DocumentDB is a work in progress. It is unstable and we might -// change the API without warning. Talk to RocksDB team before using this in -// production ;) - -// DocumentDB is a layer on top of RocksDB that provides a very simple JSON API. -// When creating a DB, you specify a list of indexes you want to keep on your -// data. You can insert a JSON document to the DB, which is automatically -// indexed. Every document added to the DB needs to have "_id" field which is -// automatically indexed and is an unique primary key. All other indexes are -// non-unique. - -// NOTE: field names in the JSON are NOT allowed to start with '$' or -// contain '.'. We don't currently enforce that rule, but will start behaving -// badly. - -// Cursor is what you get as a result of executing query. To get all -// results from a query, call Next() on a Cursor while Valid() returns true -class Cursor { - public: - Cursor() = default; - virtual ~Cursor() {} - - virtual bool Valid() const = 0; - virtual void Next() = 0; - // Lifecycle of the returned JSONDocument is until the next Next() call - virtual const JSONDocument& document() const = 0; - virtual Status status() const = 0; - - private: - // No copying allowed - Cursor(const Cursor&); - void operator=(const Cursor&); -}; - -struct DocumentDBOptions { - int background_threads = 4; - uint64_t memtable_size = 128 * 1024 * 1024; // 128 MB - uint64_t cache_size = 1 * 1024 * 1024 * 1024; // 1 GB -}; - -// TODO(icanadi) Add `JSONDocument* info` parameter to all calls that can be -// used by the caller to get more information about the call execution (number -// of dropped records, number of updated records, etc.) -class DocumentDB : public StackableDB { - public: - struct IndexDescriptor { - // Currently, you can only define an index on a single field. To specify an - // index on a field X, set index description to JSON "{X: 1}" - // Currently the value needs to be 1, which means ascending. - // In the future, we plan to also support indexes on multiple keys, where - // you could mix ascending sorting (1) with descending sorting indexes (-1) - JSONDocument* description; - std::string name; - }; - - // Open DocumentDB with specified indexes. The list of indexes has to be - // complete, i.e. include all indexes present in the DB, except the primary - // key index. - // Otherwise, Open() will return an error - static Status Open(const DocumentDBOptions& options, const std::string& name, - const std::vector& indexes, - DocumentDB** db, bool read_only = false); - - explicit DocumentDB(DB* db) : StackableDB(db) {} - - // Create a new index. It will stop all writes for the duration of the call. - // All current documents in the DB are scanned and corresponding index entries - // are created - virtual Status CreateIndex(const WriteOptions& write_options, - const IndexDescriptor& index) = 0; - - // Drop an index. Client is responsible to make sure that index is not being - // used by currently executing queries - virtual Status DropIndex(const std::string& name) = 0; - - // Insert a document to the DB. The document needs to have a primary key "_id" - // which can either be a string or an integer. Otherwise the write will fail - // with InvalidArgument. - virtual Status Insert(const WriteOptions& options, - const JSONDocument& document) = 0; - - // Deletes all documents matching a filter atomically - virtual Status Remove(const ReadOptions& read_options, - const WriteOptions& write_options, - const JSONDocument& query) = 0; - - // Does this sequence of operations: - // 1. Find all documents matching a filter - // 2. For all documents, atomically: - // 2.1. apply the update operators - // 2.2. update the secondary indexes - // - // Currently only $set update operator is supported. - // Syntax is: {$set: {key1: value1, key2: value2, etc...}} - // This operator will change a document's key1 field to value1, key2 to - // value2, etc. New values will be set even if a document didn't have an entry - // for the specified key. - // - // You can not change a primary key of a document. - // - // Update example: Update({id: {$gt: 5}, $index: id}, {$set: {enabled: true}}) - virtual Status Update(const ReadOptions& read_options, - const WriteOptions& write_options, - const JSONDocument& filter, - const JSONDocument& updates) = 0; - - // query has to be an array in which every element is an operator. Currently - // only $filter operator is supported. Syntax of $filter operator is: - // {$filter: {key1: condition1, key2: condition2, etc.}} where conditions can - // be either: - // 1) a single value in which case the condition is equality condition, or - // 2) a defined operators, like {$gt: 4}, which will match all documents that - // have key greater than 4. - // - // Supported operators are: - // 1) $gt -- greater than - // 2) $gte -- greater than or equal - // 3) $lt -- less than - // 4) $lte -- less than or equal - // If you want the filter to use an index, you need to specify it like this: - // {$filter: {...(conditions)..., $index: index_name}} - // - // Example query: - // * [{$filter: {name: John, age: {$gte: 18}, $index: age}}] - // will return all Johns whose age is greater or equal to 18 and it will use - // index "age" to satisfy the query. - virtual Cursor* Query(const ReadOptions& read_options, - const JSONDocument& query) = 0; -}; - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/include/rocksdb/utilities/geo_db.h b/include/rocksdb/utilities/geo_db.h deleted file mode 100644 index ec3cbdf26..000000000 --- a/include/rocksdb/utilities/geo_db.h +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// - -#ifndef ROCKSDB_LITE -#pragma once -#include -#include - -#include "rocksdb/utilities/stackable_db.h" -#include "rocksdb/status.h" - -namespace rocksdb { - -// -// Configurable options needed for setting up a Geo database -// -struct GeoDBOptions { - // Backup info and error messages will be written to info_log - // if non-nullptr. - // Default: nullptr - Logger* info_log; - - explicit GeoDBOptions(Logger* _info_log = nullptr):info_log(_info_log) { } -}; - -// -// A position in the earth's geoid -// -class GeoPosition { - public: - double latitude; - double longitude; - - explicit GeoPosition(double la = 0, double lo = 0) : - latitude(la), longitude(lo) { - } -}; - -// -// Description of an object on the Geoid. It is located by a GPS location, -// and is identified by the id. The value associated with this object is -// an opaque string 'value'. Different objects identified by unique id's -// can have the same gps-location associated with them. -// -class GeoObject { - public: - GeoPosition position; - std::string id; - std::string value; - - GeoObject() {} - - GeoObject(const GeoPosition& pos, const std::string& i, - const std::string& val) : - position(pos), id(i), value(val) { - } -}; - -class GeoIterator { - public: - GeoIterator() = default; - virtual ~GeoIterator() {} - virtual void Next() = 0; - virtual bool Valid() const = 0; - virtual const GeoObject& geo_object() = 0; - virtual Status status() const = 0; -}; - -// -// Stack your DB with GeoDB to be able to get geo-spatial support -// -class GeoDB : public StackableDB { - public: - // GeoDBOptions have to be the same as the ones used in a previous - // incarnation of the DB - // - // GeoDB owns the pointer `DB* db` now. You should not delete it or - // use it after the invocation of GeoDB - // GeoDB(DB* db, const GeoDBOptions& options) : StackableDB(db) {} - GeoDB(DB* db, const GeoDBOptions& /*options*/) : StackableDB(db) {} - virtual ~GeoDB() {} - - // Insert a new object into the location database. The object is - // uniquely identified by the id. If an object with the same id already - // exists in the db, then the old one is overwritten by the new - // object being inserted here. - virtual Status Insert(const GeoObject& object) = 0; - - // Retrieve the value of the object located at the specified GPS - // location and is identified by the 'id'. - virtual Status GetByPosition(const GeoPosition& pos, - const Slice& id, std::string* value) = 0; - - // Retrieve the value of the object identified by the 'id'. This method - // could be potentially slower than GetByPosition - virtual Status GetById(const Slice& id, GeoObject* object) = 0; - - // Delete the specified object - virtual Status Remove(const Slice& id) = 0; - - // Returns an iterator for the items within a circular radius from the - // specified gps location. If 'number_of_values' is specified, - // then the iterator is capped to that number of objects. - // The radius is specified in 'meters'. - virtual GeoIterator* SearchRadial(const GeoPosition& pos, - double radius, - int number_of_values = INT_MAX) = 0; -}; - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/include/rocksdb/utilities/json_document.h b/include/rocksdb/utilities/json_document.h deleted file mode 100644 index 5d841f951..000000000 --- a/include/rocksdb/utilities/json_document.h +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#pragma once -#ifndef ROCKSDB_LITE - -#include -#include -#include -#include -#include -#include -#include - -#include "rocksdb/slice.h" - -// We use JSONDocument for DocumentDB API -// Implementation inspired by folly::dynamic, rapidjson and fbson - -namespace fbson { - class FbsonValue; - class ObjectVal; - template - class FbsonWriterT; - class FbsonOutStream; - typedef FbsonWriterT FbsonWriter; -} // namespace fbson - -namespace rocksdb { - -// NOTE: none of this is thread-safe -class JSONDocument { - public: - // return nullptr on parse failure - static JSONDocument* ParseJSON(const char* json); - - enum Type { - kNull, - kArray, - kBool, - kDouble, - kInt64, - kObject, - kString, - }; - - /* implicit */ JSONDocument(); // null - /* implicit */ JSONDocument(bool b); - /* implicit */ JSONDocument(double d); - /* implicit */ JSONDocument(int8_t i); - /* implicit */ JSONDocument(int16_t i); - /* implicit */ JSONDocument(int32_t i); - /* implicit */ JSONDocument(int64_t i); - /* implicit */ JSONDocument(const std::string& s); - /* implicit */ JSONDocument(const char* s); - // constructs JSONDocument of specific type with default value - explicit JSONDocument(Type _type); - - JSONDocument(const JSONDocument& json_document); - - JSONDocument(JSONDocument&& json_document); - - Type type() const; - - // REQUIRES: IsObject() - bool Contains(const std::string& key) const; - // REQUIRES: IsObject() - // Returns non-owner object - JSONDocument operator[](const std::string& key) const; - - // REQUIRES: IsArray() == true || IsObject() == true - size_t Count() const; - - // REQUIRES: IsArray() - // Returns non-owner object - JSONDocument operator[](size_t i) const; - - JSONDocument& operator=(JSONDocument jsonDocument); - - bool IsNull() const; - bool IsArray() const; - bool IsBool() const; - bool IsDouble() const; - bool IsInt64() const; - bool IsObject() const; - bool IsString() const; - - // REQUIRES: IsBool() == true - bool GetBool() const; - // REQUIRES: IsDouble() == true - double GetDouble() const; - // REQUIRES: IsInt64() == true - int64_t GetInt64() const; - // REQUIRES: IsString() == true - std::string GetString() const; - - bool operator==(const JSONDocument& rhs) const; - - bool operator!=(const JSONDocument& rhs) const; - - JSONDocument Copy() const; - - bool IsOwner() const; - - std::string DebugString() const; - - private: - class ItemsIteratorGenerator; - - public: - // REQUIRES: IsObject() - ItemsIteratorGenerator Items() const; - - // appends serialized object to dst - void Serialize(std::string* dst) const; - // returns nullptr if Slice doesn't represent valid serialized JSONDocument - static JSONDocument* Deserialize(const Slice& src); - - private: - friend class JSONDocumentBuilder; - - JSONDocument(fbson::FbsonValue* val, bool makeCopy); - - void InitFromValue(const fbson::FbsonValue* val); - - // iteration on objects - class const_item_iterator { - private: - class Impl; - public: - typedef std::pair value_type; - explicit const_item_iterator(Impl* impl); - const_item_iterator(const_item_iterator&&); - const_item_iterator& operator++(); - bool operator!=(const const_item_iterator& other); - value_type operator*(); - ~const_item_iterator(); - private: - friend class ItemsIteratorGenerator; - std::unique_ptr it_; - }; - - class ItemsIteratorGenerator { - public: - explicit ItemsIteratorGenerator(const fbson::ObjectVal& object); - const_item_iterator begin() const; - - const_item_iterator end() const; - - private: - const fbson::ObjectVal& object_; - }; - - std::unique_ptr data_; - mutable fbson::FbsonValue* value_; - - // Our serialization format's first byte specifies the encoding version. That - // way, we can easily change our format while providing backwards - // compatibility. This constant specifies the current version of the - // serialization format - static const char kSerializationFormatVersion; -}; - -class JSONDocumentBuilder { - public: - JSONDocumentBuilder(); - - explicit JSONDocumentBuilder(fbson::FbsonOutStream* out); - - void Reset(); - - bool WriteStartArray(); - - bool WriteEndArray(); - - bool WriteStartObject(); - - bool WriteEndObject(); - - bool WriteKeyValue(const std::string& key, const JSONDocument& value); - - bool WriteJSONDocument(const JSONDocument& value); - - JSONDocument GetJSONDocument(); - - ~JSONDocumentBuilder(); - - private: - std::unique_ptr writer_; -}; - -} // namespace rocksdb - -#endif // ROCKSDB_LITE diff --git a/include/rocksdb/utilities/spatial_db.h b/include/rocksdb/utilities/spatial_db.h deleted file mode 100644 index 477b77cf6..000000000 --- a/include/rocksdb/utilities/spatial_db.h +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#ifndef ROCKSDB_LITE - -#include -#include - -#include "rocksdb/db.h" -#include "rocksdb/slice.h" -#include "rocksdb/utilities/stackable_db.h" - -namespace rocksdb { -namespace spatial { - -// NOTE: SpatialDB is experimental and we might change its API without warning. -// Please talk to us before developing against SpatialDB API. -// -// SpatialDB is a support for spatial indexes built on top of RocksDB. -// When creating a new SpatialDB, clients specifies a list of spatial indexes to -// build on their data. Each spatial index is defined by the area and -// granularity. If you're storing map data, different spatial index -// granularities can be used for different zoom levels. -// -// Each element inserted into SpatialDB has: -// * a bounding box, which determines how will the element be indexed -// * string blob, which will usually be WKB representation of the polygon -// (http://en.wikipedia.org/wiki/Well-known_text) -// * feature set, which is a map of key-value pairs, where value can be null, -// int, double, bool, string -// * a list of indexes to insert the element in -// -// Each query is executed on a single spatial index. Query guarantees that it -// will return all elements intersecting the specified bounding box, but it -// might also return some extra non-intersecting elements. - -// Variant is a class that can be many things: null, bool, int, double or string -// It is used to store different value types in FeatureSet (see below) -struct Variant { - // Don't change the values here, they are persisted on disk - enum Type { - kNull = 0x0, - kBool = 0x1, - kInt = 0x2, - kDouble = 0x3, - kString = 0x4, - }; - - Variant() : type_(kNull) {} - /* implicit */ Variant(bool b) : type_(kBool) { data_.b = b; } - /* implicit */ Variant(uint64_t i) : type_(kInt) { data_.i = i; } - /* implicit */ Variant(double d) : type_(kDouble) { data_.d = d; } - /* implicit */ Variant(const std::string& s) : type_(kString) { - new (&data_.s) std::string(s); - } - - Variant(const Variant& v) : type_(v.type_) { Init(v, data_); } - - Variant& operator=(const Variant& v); - - Variant(Variant&& rhs) : type_(kNull) { *this = std::move(rhs); } - - Variant& operator=(Variant&& v); - - ~Variant() { Destroy(type_, data_); } - - Type type() const { return type_; } - bool get_bool() const { return data_.b; } - uint64_t get_int() const { return data_.i; } - double get_double() const { return data_.d; } - const std::string& get_string() const { return *GetStringPtr(data_); } - - bool operator==(const Variant& other) const; - bool operator!=(const Variant& other) const { return !(*this == other); } - - private: - Type type_; - - union Data { - bool b; - uint64_t i; - double d; - // Current version of MS compiler not C++11 compliant so can not put - // std::string - // however, even then we still need the rest of the maintenance. - char s[sizeof(std::string)]; - } data_; - - // Avoid type_punned aliasing problem - static std::string* GetStringPtr(Data& d) { - void* p = d.s; - return reinterpret_cast(p); - } - - static const std::string* GetStringPtr(const Data& d) { - const void* p = d.s; - return reinterpret_cast(p); - } - - static void Init(const Variant&, Data&); - - static void Destroy(Type t, Data& d) { - if (t == kString) { - using std::string; - GetStringPtr(d)->~string(); - } - } -}; - -// FeatureSet is a map of key-value pairs. One feature set is associated with -// each element in SpatialDB. It can be used to add rich data about the element. -class FeatureSet { - private: - typedef std::unordered_map map; - - public: - class iterator { - public: - /* implicit */ iterator(const map::const_iterator itr) : itr_(itr) {} - iterator& operator++() { - ++itr_; - return *this; - } - bool operator!=(const iterator& other) { return itr_ != other.itr_; } - bool operator==(const iterator& other) { return itr_ == other.itr_; } - map::value_type operator*() { return *itr_; } - - private: - map::const_iterator itr_; - }; - FeatureSet() = default; - - FeatureSet* Set(const std::string& key, const Variant& value); - bool Contains(const std::string& key) const; - // REQUIRES: Contains(key) - const Variant& Get(const std::string& key) const; - iterator Find(const std::string& key) const; - - iterator begin() const { return map_.begin(); } - iterator end() const { return map_.end(); } - - void Clear(); - size_t Size() const { return map_.size(); } - - void Serialize(std::string* output) const; - // REQUIRED: empty FeatureSet - bool Deserialize(const Slice& input); - - std::string DebugString() const; - - private: - map map_; -}; - -// BoundingBox is a helper structure for defining rectangles representing -// bounding boxes of spatial elements. -template -struct BoundingBox { - T min_x, min_y, max_x, max_y; - BoundingBox() = default; - BoundingBox(T _min_x, T _min_y, T _max_x, T _max_y) - : min_x(_min_x), min_y(_min_y), max_x(_max_x), max_y(_max_y) {} - - bool Intersects(const BoundingBox& a) const { - return !(min_x > a.max_x || min_y > a.max_y || a.min_x > max_x || - a.min_y > max_y); - } -}; - -struct SpatialDBOptions { - uint64_t cache_size = 1 * 1024 * 1024 * 1024LL; // 1GB - int num_threads = 16; - bool bulk_load = true; -}; - -// Cursor is used to return data from the query to the client. To get all the -// data from the query, just call Next() while Valid() is true -class Cursor { - public: - Cursor() = default; - virtual ~Cursor() {} - - virtual bool Valid() const = 0; - // REQUIRES: Valid() - virtual void Next() = 0; - - // Lifetime of the underlying storage until the next call to Next() - // REQUIRES: Valid() - virtual const Slice blob() = 0; - // Lifetime of the underlying storage until the next call to Next() - // REQUIRES: Valid() - virtual const FeatureSet& feature_set() = 0; - - virtual Status status() const = 0; - - private: - // No copying allowed - Cursor(const Cursor&); - void operator=(const Cursor&); -}; - -// SpatialIndexOptions defines a spatial index that will be built on the data -struct SpatialIndexOptions { - // Spatial indexes are referenced by names - std::string name; - // An area that is indexed. If the element is not intersecting with spatial - // index's bbox, it will not be inserted into the index - BoundingBox bbox; - // tile_bits control the granularity of the spatial index. Each dimension of - // the bbox will be split into (1 << tile_bits) tiles, so there will be a - // total of (1 << tile_bits)^2 tiles. It is recommended to configure a size of - // each tile to be approximately the size of the query on that spatial index - uint32_t tile_bits; - SpatialIndexOptions() {} - SpatialIndexOptions(const std::string& _name, - const BoundingBox& _bbox, uint32_t _tile_bits) - : name(_name), bbox(_bbox), tile_bits(_tile_bits) {} -}; - -class SpatialDB : public StackableDB { - public: - // Creates the SpatialDB with specified list of indexes. - // REQUIRED: db doesn't exist - static Status Create(const SpatialDBOptions& options, const std::string& name, - const std::vector& spatial_indexes); - - // Open the existing SpatialDB. The resulting db object will be returned - // through db parameter. - // REQUIRED: db was created using SpatialDB::Create - static Status Open(const SpatialDBOptions& options, const std::string& name, - SpatialDB** db, bool read_only = false); - - explicit SpatialDB(DB* db) : StackableDB(db) {} - - // Insert the element into the DB. Element will be inserted into specified - // spatial_indexes, based on specified bbox. - // REQUIRES: spatial_indexes.size() > 0 - virtual Status Insert(const WriteOptions& write_options, - const BoundingBox& bbox, const Slice& blob, - const FeatureSet& feature_set, - const std::vector& spatial_indexes) = 0; - - // Calling Compact() after inserting a bunch of elements should speed up - // reading. This is especially useful if you use SpatialDBOptions::bulk_load - // Num threads determines how many threads we'll use for compactions. Setting - // this to bigger number will use more IO and CPU, but finish faster - virtual Status Compact(int num_threads = 1) = 0; - - // Query the specified spatial_index. Query will return all elements that - // intersect bbox, but it may also return some extra elements. - virtual Cursor* Query(const ReadOptions& read_options, - const BoundingBox& bbox, - const std::string& spatial_index) = 0; -}; - -} // namespace spatial -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/src.mk b/src.mk index 8b3ab68d8..5cc599fda 100644 --- a/src.mk +++ b/src.mk @@ -177,14 +177,9 @@ LIB_SOURCES = \ utilities/checkpoint/checkpoint_impl.cc \ utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc \ utilities/convenience/info_log_finder.cc \ - utilities/date_tiered/date_tiered_db_impl.cc \ utilities/debug.cc \ - utilities/document/document_db.cc \ - utilities/document/json_document.cc \ - utilities/document/json_document_builder.cc \ utilities/env_mirror.cc \ utilities/env_timed.cc \ - utilities/geodb/geodb_impl.cc \ utilities/leveldb_options/leveldb_options.cc \ utilities/lua/rocks_lua_compaction_filter.cc \ utilities/memory/memory_util.cc \ @@ -201,9 +196,7 @@ LIB_SOURCES = \ utilities/persistent_cache/block_cache_tier_metadata.cc \ utilities/persistent_cache/persistent_cache_tier.cc \ utilities/persistent_cache/volatile_tier_impl.cc \ - utilities/redis/redis_lists.cc \ utilities/simulator_cache/sim_cache.cc \ - utilities/spatialdb/spatial_db.cc \ utilities/table_properties_collectors/compact_on_deletion_collector.cc \ utilities/trace/file_trace_reader_writer.cc \ utilities/transactions/optimistic_transaction.cc \ @@ -249,11 +242,6 @@ MOCK_LIB_SOURCES = \ BENCH_LIB_SOURCES = \ tools/db_bench_tool.cc \ -EXP_LIB_SOURCES = \ - utilities/col_buf_decoder.cc \ - utilities/col_buf_encoder.cc \ - utilities/column_aware_encoding_util.cc - TEST_LIB_SOURCES = \ db/db_test_util.cc \ util/testharness.cc \ @@ -330,7 +318,6 @@ MAIN_SOURCES = \ db/persistent_cache_test.cc \ db/plain_table_db_test.cc \ db/prefix_test.cc \ - db/redis_test.cc \ db/repair_test.cc \ db/range_del_aggregator_test.cc \ db/range_del_aggregator_bench.cc \ @@ -397,21 +384,13 @@ MAIN_SOURCES = \ utilities/cassandra/cassandra_row_merge_test.cc \ utilities/cassandra/cassandra_serialize_test.cc \ utilities/checkpoint/checkpoint_test.cc \ - utilities/column_aware_encoding_exp.cc \ - utilities/column_aware_encoding_test.cc \ - utilities/date_tiered/date_tiered_test.cc \ - utilities/document/document_db_test.cc \ - utilities/document/json_document_test.cc \ - utilities/geodb/geodb_test.cc \ utilities/lua/rocks_lua_test.cc \ utilities/memory/memory_test.cc \ utilities/merge_operators/string_append/stringappend_test.cc \ utilities/object_registry_test.cc \ utilities/option_change_migration/option_change_migration_test.cc \ utilities/options/options_util_test.cc \ - utilities/redis/redis_lists_test.cc \ utilities/simulator_cache/sim_cache_test.cc \ - utilities/spatialdb/spatial_db_test.cc \ utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \ utilities/transactions/optimistic_transaction_test.cc \ utilities/transactions/transaction_test.cc \ diff --git a/utilities/col_buf_decoder.cc b/utilities/col_buf_decoder.cc deleted file mode 100644 index 8f9fa74ab..000000000 --- a/utilities/col_buf_decoder.cc +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "utilities/col_buf_decoder.h" -#include -#include -#include "port/port.h" - -namespace rocksdb { - -ColBufDecoder::~ColBufDecoder() {} - -namespace { - -inline uint64_t EncodeFixed64WithEndian(uint64_t val, bool big_endian, - size_t size) { - if (big_endian && port::kLittleEndian) { - val = EndianTransform(val, size); - } else if (!big_endian && !port::kLittleEndian) { - val = EndianTransform(val, size); - } - return val; -} - -} // namespace - -ColBufDecoder* ColBufDecoder::NewColBufDecoder( - const ColDeclaration& col_declaration) { - if (col_declaration.col_type == "FixedLength") { - return new FixedLengthColBufDecoder( - col_declaration.size, col_declaration.col_compression_type, - col_declaration.nullable, col_declaration.big_endian); - } else if (col_declaration.col_type == "VariableLength") { - return new VariableLengthColBufDecoder(); - } else if (col_declaration.col_type == "VariableChunk") { - return new VariableChunkColBufDecoder(col_declaration.col_compression_type); - } else if (col_declaration.col_type == "LongFixedLength") { - return new LongFixedLengthColBufDecoder(col_declaration.size, - col_declaration.nullable); - } - // Unrecognized column type - return nullptr; -} - -namespace { - -void ReadVarint64(const char** src_ptr, uint64_t* val_ptr) { - const char* q = GetVarint64Ptr(*src_ptr, *src_ptr + 10, val_ptr); - assert(q != nullptr); - *src_ptr = q; -} -} // namespace - -size_t FixedLengthColBufDecoder::Init(const char* src) { - remain_runs_ = 0; - last_val_ = 0; - // Dictionary initialization - dict_vec_.clear(); - const char* orig_src = src; - if (col_compression_type_ == kColDict || - col_compression_type_ == kColRleDict) { - const char* q; - uint64_t dict_size; - // Bypass limit - q = GetVarint64Ptr(src, src + 10, &dict_size); - assert(q != nullptr); - src = q; - - uint64_t dict_key; - for (uint64_t i = 0; i < dict_size; ++i) { - // Bypass limit - ReadVarint64(&src, &dict_key); - - dict_key = EncodeFixed64WithEndian(dict_key, big_endian_, size_); - dict_vec_.push_back(dict_key); - } - } - return src - orig_src; -} - -size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) { - uint64_t read_val = 0; - const char* orig_src = src; - const char* src_limit = src + 20; - if (nullable_) { - bool not_null; - not_null = *src; - src += 1; - if (!not_null) { - return 1; - } - } - if (IsRunLength(col_compression_type_)) { - if (remain_runs_ == 0) { - const char* q; - run_val_ = 0; - if (col_compression_type_ == kColRle) { - memcpy(&run_val_, src, size_); - src += size_; - } else { - q = GetVarint64Ptr(src, src_limit, &run_val_); - assert(q != nullptr); - src = q; - } - - q = GetVarint64Ptr(src, src_limit, &remain_runs_); - assert(q != nullptr); - src = q; - - if (col_compression_type_ != kColRleDeltaVarint && - col_compression_type_ != kColRleDict) { - run_val_ = EncodeFixed64WithEndian(run_val_, big_endian_, size_); - } - } - read_val = run_val_; - } else { - if (col_compression_type_ == kColNoCompression) { - memcpy(&read_val, src, size_); - src += size_; - } else { - // Assume a column does not exceed 8 bytes here - const char* q = GetVarint64Ptr(src, src_limit, &read_val); - assert(q != nullptr); - src = q; - } - if (col_compression_type_ != kColDeltaVarint && - col_compression_type_ != kColDict) { - read_val = EncodeFixed64WithEndian(read_val, big_endian_, size_); - } - } - - uint64_t write_val = read_val; - if (col_compression_type_ == kColDeltaVarint || - col_compression_type_ == kColRleDeltaVarint) { - // does not support 64 bit - - uint64_t mask = (write_val & 1) ? (~uint64_t(0)) : 0; - int64_t delta = (write_val >> 1) ^ mask; - write_val = last_val_ + delta; - - uint64_t tmp = write_val; - write_val = EncodeFixed64WithEndian(write_val, big_endian_, size_); - last_val_ = tmp; - } else if (col_compression_type_ == kColRleDict || - col_compression_type_ == kColDict) { - uint64_t dict_val = read_val; - assert(dict_val < dict_vec_.size()); - write_val = dict_vec_[static_cast(dict_val)]; - } - - // dest->append(reinterpret_cast(&write_val), size_); - memcpy(*dest, reinterpret_cast(&write_val), size_); - *dest += size_; - if (IsRunLength(col_compression_type_)) { - --remain_runs_; - } - return src - orig_src; -} - -size_t LongFixedLengthColBufDecoder::Decode(const char* src, char** dest) { - if (nullable_) { - bool not_null; - not_null = *src; - src += 1; - if (!not_null) { - return 1; - } - } - memcpy(*dest, src, size_); - *dest += size_; - return size_ + 1; -} - -size_t VariableLengthColBufDecoder::Decode(const char* src, char** dest) { - uint8_t len; - len = *src; - memcpy(dest, reinterpret_cast(&len), 1); - *dest += 1; - src += 1; - memcpy(*dest, src, len); - *dest += len; - return len + 1; -} - -size_t VariableChunkColBufDecoder::Init(const char* src) { - // Dictionary initialization - dict_vec_.clear(); - const char* orig_src = src; - if (col_compression_type_ == kColDict) { - const char* q; - uint64_t dict_size; - // Bypass limit - q = GetVarint64Ptr(src, src + 10, &dict_size); - assert(q != nullptr); - src = q; - - uint64_t dict_key; - for (uint64_t i = 0; i < dict_size; ++i) { - // Bypass limit - ReadVarint64(&src, &dict_key); - dict_vec_.push_back(dict_key); - } - } - return src - orig_src; -} - -size_t VariableChunkColBufDecoder::Decode(const char* src, char** dest) { - const char* orig_src = src; - uint64_t size = 0; - ReadVarint64(&src, &size); - int64_t full_chunks = size / 8; - uint64_t chunk_buf; - size_t chunk_size = 8; - for (int64_t i = 0; i < full_chunks + 1; ++i) { - chunk_buf = 0; - if (i == full_chunks) { - chunk_size = size % 8; - } - if (col_compression_type_ == kColDict) { - uint64_t dict_val; - ReadVarint64(&src, &dict_val); - assert(dict_val < dict_vec_.size()); - chunk_buf = dict_vec_[static_cast(dict_val)]; - } else { - memcpy(&chunk_buf, src, chunk_size); - src += chunk_size; - } - memcpy(*dest, reinterpret_cast(&chunk_buf), 8); - *dest += 8; - uint8_t mask = ((0xFF - 8) + chunk_size) & 0xFF; - memcpy(*dest, reinterpret_cast(&mask), 1); - *dest += 1; - } - - return src - orig_src; -} - -} // namespace rocksdb diff --git a/utilities/col_buf_decoder.h b/utilities/col_buf_decoder.h deleted file mode 100644 index cea952637..000000000 --- a/utilities/col_buf_decoder.h +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#include -#include -#include -#include -#include -#include -#include "util/coding.h" -#include "utilities/col_buf_encoder.h" - -namespace rocksdb { - -struct ColDeclaration; - -// ColBufDecoder is a class to decode column buffers. It can be populated from a -// ColDeclaration. Before starting decoding, a Init() method should be called. -// Each time it takes a column value into Decode() method. -class ColBufDecoder { - public: - virtual ~ColBufDecoder() = 0; - virtual size_t Init(const char* /*src*/) { return 0; } - virtual size_t Decode(const char* src, char** dest) = 0; - static ColBufDecoder* NewColBufDecoder(const ColDeclaration& col_declaration); - - protected: - std::string buffer_; - static inline bool IsRunLength(ColCompressionType type) { - return type == kColRle || type == kColRleVarint || - type == kColRleDeltaVarint || type == kColRleDict; - } -}; - -class FixedLengthColBufDecoder : public ColBufDecoder { - public: - explicit FixedLengthColBufDecoder( - size_t size, ColCompressionType col_compression_type = kColNoCompression, - bool nullable = false, bool big_endian = false) - : size_(size), - col_compression_type_(col_compression_type), - nullable_(nullable), - big_endian_(big_endian), - remain_runs_(0), - run_val_(0), - last_val_(0) {} - - size_t Init(const char* src) override; - size_t Decode(const char* src, char** dest) override; - ~FixedLengthColBufDecoder() {} - - private: - size_t size_; - ColCompressionType col_compression_type_; - bool nullable_; - bool big_endian_; - - // for decoding - std::vector dict_vec_; - uint64_t remain_runs_; - uint64_t run_val_; - uint64_t last_val_; -}; - -class LongFixedLengthColBufDecoder : public ColBufDecoder { - public: - LongFixedLengthColBufDecoder(size_t size, bool nullable) - : size_(size), nullable_(nullable) {} - - size_t Decode(const char* src, char** dest) override; - ~LongFixedLengthColBufDecoder() {} - - private: - size_t size_; - bool nullable_; -}; - -class VariableLengthColBufDecoder : public ColBufDecoder { - public: - size_t Decode(const char* src, char** dest) override; - ~VariableLengthColBufDecoder() {} -}; - -class VariableChunkColBufDecoder : public VariableLengthColBufDecoder { - public: - size_t Init(const char* src) override; - size_t Decode(const char* src, char** dest) override; - explicit VariableChunkColBufDecoder(ColCompressionType col_compression_type) - : col_compression_type_(col_compression_type) {} - VariableChunkColBufDecoder() : col_compression_type_(kColNoCompression) {} - - private: - ColCompressionType col_compression_type_; - std::unordered_map dictionary_; - std::vector dict_vec_; -}; - -struct KVPairColBufDecoders { - std::vector> key_col_bufs; - std::vector> value_col_bufs; - std::unique_ptr value_checksum_buf; - - explicit KVPairColBufDecoders(const KVPairColDeclarations& kvp_cd) { - for (auto kcd : *kvp_cd.key_col_declarations) { - key_col_bufs.emplace_back( - std::move(ColBufDecoder::NewColBufDecoder(kcd))); - } - for (auto vcd : *kvp_cd.value_col_declarations) { - value_col_bufs.emplace_back( - std::move(ColBufDecoder::NewColBufDecoder(vcd))); - } - value_checksum_buf.reset( - ColBufDecoder::NewColBufDecoder(*kvp_cd.value_checksum_declaration)); - } -}; -} // namespace rocksdb diff --git a/utilities/col_buf_encoder.cc b/utilities/col_buf_encoder.cc deleted file mode 100644 index f8b19e8c7..000000000 --- a/utilities/col_buf_encoder.cc +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "utilities/col_buf_encoder.h" -#include -#include -#include "port/port.h" - -namespace rocksdb { - -ColBufEncoder::~ColBufEncoder() {} - -namespace { - -inline uint64_t DecodeFixed64WithEndian(uint64_t val, bool big_endian, - size_t size) { - if (big_endian && port::kLittleEndian) { - val = EndianTransform(val, size); - } else if (!big_endian && !port::kLittleEndian) { - val = EndianTransform(val, size); - } - return val; -} - -} // namespace - -const std::string &ColBufEncoder::GetData() { return buffer_; } - -ColBufEncoder *ColBufEncoder::NewColBufEncoder( - const ColDeclaration &col_declaration) { - if (col_declaration.col_type == "FixedLength") { - return new FixedLengthColBufEncoder( - col_declaration.size, col_declaration.col_compression_type, - col_declaration.nullable, col_declaration.big_endian); - } else if (col_declaration.col_type == "VariableLength") { - return new VariableLengthColBufEncoder(); - } else if (col_declaration.col_type == "VariableChunk") { - return new VariableChunkColBufEncoder(col_declaration.col_compression_type); - } else if (col_declaration.col_type == "LongFixedLength") { - return new LongFixedLengthColBufEncoder(col_declaration.size, - col_declaration.nullable); - } - // Unrecognized column type - return nullptr; -} - -size_t FixedLengthColBufEncoder::Append(const char *buf) { - if (nullable_) { - if (buf == nullptr) { - buffer_.append(1, 0); - return 0; - } else { - buffer_.append(1, 1); - } - } - uint64_t read_val = 0; - memcpy(&read_val, buf, size_); - read_val = DecodeFixed64WithEndian(read_val, big_endian_, size_); - - // Determine write value - uint64_t write_val = read_val; - if (col_compression_type_ == kColDeltaVarint || - col_compression_type_ == kColRleDeltaVarint) { - int64_t delta = read_val - last_val_; - // Encode signed delta value - delta = (static_cast(delta) << 1) ^ (delta >> 63); - write_val = delta; - last_val_ = read_val; - } else if (col_compression_type_ == kColDict || - col_compression_type_ == kColRleDict) { - auto iter = dictionary_.find(read_val); - uint64_t dict_val; - if (iter == dictionary_.end()) { - // Add new entry to dictionary - dict_val = dictionary_.size(); - dictionary_.insert(std::make_pair(read_val, dict_val)); - dict_vec_.push_back(read_val); - } else { - dict_val = iter->second; - } - write_val = dict_val; - } - - // Write into buffer - if (IsRunLength(col_compression_type_)) { - if (run_length_ == -1) { - // First element - run_val_ = write_val; - run_length_ = 1; - } else if (write_val != run_val_) { - // End of run - // Write run value - if (col_compression_type_ == kColRle) { - buffer_.append(reinterpret_cast(&run_val_), size_); - } else { - PutVarint64(&buffer_, run_val_); - } - // Write run length - PutVarint64(&buffer_, run_length_); - run_val_ = write_val; - run_length_ = 1; - } else { - run_length_++; - } - } else { // non run-length encodings - if (col_compression_type_ == kColNoCompression) { - buffer_.append(reinterpret_cast(&write_val), size_); - } else { - PutVarint64(&buffer_, write_val); - } - } - return size_; -} - -void FixedLengthColBufEncoder::Finish() { - if (col_compression_type_ == kColDict || - col_compression_type_ == kColRleDict) { - std::string header; - PutVarint64(&header, dict_vec_.size()); - // Put dictionary in the header - for (auto item : dict_vec_) { - PutVarint64(&header, item); - } - buffer_ = header + buffer_; - } - if (IsRunLength(col_compression_type_)) { - // Finish last run value - if (col_compression_type_ == kColRle) { - buffer_.append(reinterpret_cast(&run_val_), size_); - } else { - PutVarint64(&buffer_, run_val_); - } - PutVarint64(&buffer_, run_length_); - } -} - -size_t LongFixedLengthColBufEncoder::Append(const char *buf) { - if (nullable_) { - if (buf == nullptr) { - buffer_.append(1, 0); - return 0; - } else { - buffer_.append(1, 1); - } - } - buffer_.append(buf, size_); - return size_; -} - -void LongFixedLengthColBufEncoder::Finish() {} - -size_t VariableLengthColBufEncoder::Append(const char *buf) { - uint8_t length = 0; - length = *buf; - buffer_.append(buf, 1); - buf += 1; - buffer_.append(buf, length); - return length + 1; -} - -void VariableLengthColBufEncoder::Finish() {} - -size_t VariableChunkColBufEncoder::Append(const char *buf) { - const char *orig_buf = buf; - uint8_t mark = 0xFF; - size_t length = 0; - std::string tmp_buffer; - while (mark == 0xFF) { - uint64_t val; - memcpy(&val, buf, 8); - buf += 8; - mark = *buf; - buf += 1; - int8_t chunk_size = 8 - (0xFF - mark); - if (col_compression_type_ == kColDict) { - auto iter = dictionary_.find(val); - uint64_t dict_val; - if (iter == dictionary_.end()) { - dict_val = dictionary_.size(); - dictionary_.insert(std::make_pair(val, dict_val)); - dict_vec_.push_back(val); - } else { - dict_val = iter->second; - } - PutVarint64(&tmp_buffer, dict_val); - } else { - tmp_buffer.append(reinterpret_cast(&val), chunk_size); - } - length += chunk_size; - } - - PutVarint64(&buffer_, length); - buffer_.append(tmp_buffer); - return buf - orig_buf; -} - -void VariableChunkColBufEncoder::Finish() { - if (col_compression_type_ == kColDict) { - std::string header; - PutVarint64(&header, dict_vec_.size()); - for (auto item : dict_vec_) { - PutVarint64(&header, item); - } - buffer_ = header + buffer_; - } -} - -} // namespace rocksdb diff --git a/utilities/col_buf_encoder.h b/utilities/col_buf_encoder.h deleted file mode 100644 index 902879925..000000000 --- a/utilities/col_buf_encoder.h +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#include -#include -#include -#include -#include -#include -#include "util/coding.h" - -namespace rocksdb { - -enum ColCompressionType { - kColNoCompression, - kColRle, - kColVarint, - kColRleVarint, - kColDeltaVarint, - kColRleDeltaVarint, - kColDict, - kColRleDict -}; - -struct ColDeclaration; - -// ColBufEncoder is a class to encode column buffers. It can be populated from a -// ColDeclaration. Each time it takes a column value into Append() method to -// encode the column and store it into an internal buffer. After all rows for -// this column are consumed, a Finish() should be called to add header and -// remaining data. -class ColBufEncoder { - public: - // Read a column, encode data and append into internal buffer. - virtual size_t Append(const char *buf) = 0; - virtual ~ColBufEncoder() = 0; - // Get the internal column buffer. Should only be called after Finish(). - const std::string &GetData(); - // Finish encoding. Add header and remaining data. - virtual void Finish() = 0; - // Populate a ColBufEncoder from ColDeclaration. - static ColBufEncoder *NewColBufEncoder(const ColDeclaration &col_declaration); - - protected: - std::string buffer_; - static inline bool IsRunLength(ColCompressionType type) { - return type == kColRle || type == kColRleVarint || - type == kColRleDeltaVarint || type == kColRleDict; - } -}; - -// Encoder for fixed length column buffer. In fixed length column buffer, the -// size of the column should not exceed 8 bytes. -// The following encodings are supported: -// Varint: Variable length integer. See util/coding.h for more details -// Rle (Run length encoding): encode a sequence of contiguous value as -// [run_value][run_length]. Can be combined with Varint -// Delta: Encode value to its delta with its adjacent entry. Use varint to -// possibly reduce stored bytes. Can be combined with Rle. -// Dictionary: Use a dictionary to record all possible values in the block and -// encode them with an ID started from 0. IDs are encoded as varint. A column -// with dictionary encoding will have a header to store all actual values, -// ordered by their dictionary value, and the data will be replaced by -// dictionary value. Can be combined with Rle. -class FixedLengthColBufEncoder : public ColBufEncoder { - public: - explicit FixedLengthColBufEncoder( - size_t size, ColCompressionType col_compression_type = kColNoCompression, - bool nullable = false, bool big_endian = false) - : size_(size), - col_compression_type_(col_compression_type), - nullable_(nullable), - big_endian_(big_endian), - last_val_(0), - run_length_(-1), - run_val_(0) {} - - size_t Append(const char *buf) override; - void Finish() override; - ~FixedLengthColBufEncoder() {} - - private: - size_t size_; - ColCompressionType col_compression_type_; - // If set as true, the input value can be null (represented as nullptr). When - // nullable is true, use one more byte before actual value to indicate if the - // current value is null. - bool nullable_; - // If set as true, input value will be treated as big endian encoded. - bool big_endian_; - - // for encoding - uint64_t last_val_; - int16_t run_length_; - uint64_t run_val_; - // Map to store dictionary for dictionary encoding - std::unordered_map dictionary_; - // Vector of dictionary keys. - std::vector dict_vec_; -}; - -// Long fixed length column buffer is a variant of fixed length buffer to hold -// fixed length buffer with more than 8 bytes. We do not support any special -// encoding schemes in LongFixedLengthColBufEncoder. -class LongFixedLengthColBufEncoder : public ColBufEncoder { - public: - LongFixedLengthColBufEncoder(size_t size, bool nullable) - : size_(size), nullable_(nullable) {} - size_t Append(const char *buf) override; - void Finish() override; - - ~LongFixedLengthColBufEncoder() {} - - private: - size_t size_; - bool nullable_; -}; - -// Variable length column buffer holds a format of variable length column. In -// this format, a column is composed of one byte length k, followed by data with -// k bytes long data. -class VariableLengthColBufEncoder : public ColBufEncoder { - public: - size_t Append(const char *buf) override; - void Finish() override; - - ~VariableLengthColBufEncoder() {} -}; - -// Variable chunk column buffer holds another format of variable length column. -// In this format, a column contains multiple chunks of data, each of which is -// composed of 8 bytes long data, and one byte as a mask to indicate whether we -// have more data to come. If no more data coming, the mask is set as 0xFF. If -// the chunk is the last chunk and has only k valid bytes, the mask is set as -// 0xFF - (8 - k). -class VariableChunkColBufEncoder : public VariableLengthColBufEncoder { - public: - size_t Append(const char *buf) override; - void Finish() override; - explicit VariableChunkColBufEncoder(ColCompressionType col_compression_type) - : col_compression_type_(col_compression_type) {} - VariableChunkColBufEncoder() : col_compression_type_(kColNoCompression) {} - - private: - ColCompressionType col_compression_type_; - // Map to store dictionary for dictionary encoding - std::unordered_map dictionary_; - // Vector of dictionary keys. - std::vector dict_vec_; -}; - -// ColDeclaration declares a column's type, algorithm of column-aware encoding, -// and other column data like endian and nullability. -struct ColDeclaration { - explicit ColDeclaration( - std::string _col_type, - ColCompressionType _col_compression_type = kColNoCompression, - size_t _size = 0, bool _nullable = false, bool _big_endian = false) - : col_type(_col_type), - col_compression_type(_col_compression_type), - size(_size), - nullable(_nullable), - big_endian(_big_endian) {} - std::string col_type; - ColCompressionType col_compression_type; - size_t size; - bool nullable; - bool big_endian; -}; - -// KVPairColDeclarations is a class to hold column declaration of columns in -// key and value. -struct KVPairColDeclarations { - std::vector *key_col_declarations; - std::vector *value_col_declarations; - ColDeclaration *value_checksum_declaration; - KVPairColDeclarations(std::vector *_key_col_declarations, - std::vector *_value_col_declarations, - ColDeclaration *_value_checksum_declaration) - : key_col_declarations(_key_col_declarations), - value_col_declarations(_value_col_declarations), - value_checksum_declaration(_value_checksum_declaration) {} -}; - -// Similar to KVPairDeclarations, KVPairColBufEncoders is used to hold column -// buffer encoders of all columns in key and value. -struct KVPairColBufEncoders { - std::vector> key_col_bufs; - std::vector> value_col_bufs; - std::unique_ptr value_checksum_buf; - - explicit KVPairColBufEncoders(const KVPairColDeclarations &kvp_cd) { - for (auto kcd : *kvp_cd.key_col_declarations) { - key_col_bufs.emplace_back( - std::move(ColBufEncoder::NewColBufEncoder(kcd))); - } - for (auto vcd : *kvp_cd.value_col_declarations) { - value_col_bufs.emplace_back( - std::move(ColBufEncoder::NewColBufEncoder(vcd))); - } - value_checksum_buf.reset( - ColBufEncoder::NewColBufEncoder(*kvp_cd.value_checksum_declaration)); - } - - // Helper function to call Finish() - void Finish() { - for (auto &col_buf : key_col_bufs) { - col_buf->Finish(); - } - for (auto &col_buf : value_col_bufs) { - col_buf->Finish(); - } - value_checksum_buf->Finish(); - } -}; -} // namespace rocksdb diff --git a/utilities/column_aware_encoding_exp.cc b/utilities/column_aware_encoding_exp.cc deleted file mode 100644 index c251c985e..000000000 --- a/utilities/column_aware_encoding_exp.cc +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif - -#include -#include - -#ifndef ROCKSDB_LITE -#ifdef GFLAGS - -#include -#include -#include "rocksdb/env.h" -#include "rocksdb/options.h" -#include "table/block_based_table_builder.h" -#include "table/block_based_table_reader.h" -#include "table/format.h" -#include "tools/sst_dump_tool_imp.h" -#include "util/compression.h" -#include "util/gflags_compat.h" -#include "util/stop_watch.h" -#include "utilities/col_buf_encoder.h" -#include "utilities/column_aware_encoding_util.h" - -using GFLAGS_NAMESPACE::ParseCommandLineFlags; -DEFINE_string(encoded_file, "", "file to store encoded data blocks"); -DEFINE_string(decoded_file, "", - "file to store decoded data blocks after encoding"); -DEFINE_string(format, "col", "Output Format. Can be 'row' or 'col'"); -// TODO(jhli): option `col` should be removed and replaced by general -// column specifications. -DEFINE_string(index_type, "col", "Index type. Can be 'primary' or 'secondary'"); -DEFINE_string(dump_file, "", - "Dump data blocks separated by columns in human-readable format"); -DEFINE_bool(decode, false, "Deocde blocks after they are encoded"); -DEFINE_bool(stat, false, - "Print column distribution statistics. Cannot decode in this mode"); -DEFINE_string(compression_type, "kNoCompression", - "The compression algorithm used to compress data blocks"); - -namespace rocksdb { - -class ColumnAwareEncodingExp { - public: - static void Run(const std::string& sst_file) { - bool decode = FLAGS_decode; - if (FLAGS_decoded_file.size() > 0) { - decode = true; - } - if (FLAGS_stat) { - decode = false; - } - - ColumnAwareEncodingReader reader(sst_file); - std::vector* key_col_declarations; - std::vector* value_col_declarations; - ColDeclaration* value_checksum_declaration; - if (FLAGS_index_type == "primary") { - ColumnAwareEncodingReader::GetColDeclarationsPrimary( - &key_col_declarations, &value_col_declarations, - &value_checksum_declaration); - } else { - ColumnAwareEncodingReader::GetColDeclarationsSecondary( - &key_col_declarations, &value_col_declarations, - &value_checksum_declaration); - } - KVPairColDeclarations kvp_cd(key_col_declarations, value_col_declarations, - value_checksum_declaration); - - if (!FLAGS_dump_file.empty()) { - std::vector kv_pair_blocks; - reader.GetKVPairsFromDataBlocks(&kv_pair_blocks); - reader.DumpDataColumns(FLAGS_dump_file, kvp_cd, kv_pair_blocks); - return; - } - std::unordered_map compressions = { - {"kNoCompression", CompressionType::kNoCompression}, - {"kZlibCompression", CompressionType::kZlibCompression}, - {"kZSTD", CompressionType::kZSTD}}; - - // Find Compression - CompressionType compression_type = compressions[FLAGS_compression_type]; - EnvOptions env_options; - if (CompressionTypeSupported(compression_type)) { - fprintf(stdout, "[%s]\n", FLAGS_compression_type.c_str()); - std::unique_ptr encoded_out_file; - - std::unique_ptr env(NewMemEnv(Env::Default())); - if (!FLAGS_encoded_file.empty()) { - env->NewWritableFile(FLAGS_encoded_file, &encoded_out_file, - env_options); - } - - std::vector kv_pair_blocks; - reader.GetKVPairsFromDataBlocks(&kv_pair_blocks); - - std::vector encoded_blocks; - StopWatchNano sw(env.get(), true); - if (FLAGS_format == "col") { - reader.EncodeBlocks(kvp_cd, encoded_out_file.get(), compression_type, - kv_pair_blocks, &encoded_blocks, FLAGS_stat); - } else { // row format - reader.EncodeBlocksToRowFormat(encoded_out_file.get(), compression_type, - kv_pair_blocks, &encoded_blocks); - } - if (encoded_out_file != nullptr) { - uint64_t size = 0; - env->GetFileSize(FLAGS_encoded_file, &size); - fprintf(stdout, "File size: %" PRIu64 "\n", size); - } - uint64_t encode_time = sw.ElapsedNanosSafe(false /* reset */); - fprintf(stdout, "Encode time: %" PRIu64 "\n", encode_time); - if (decode) { - std::unique_ptr decoded_out_file; - if (!FLAGS_decoded_file.empty()) { - env->NewWritableFile(FLAGS_decoded_file, &decoded_out_file, - env_options); - } - sw.Start(); - if (FLAGS_format == "col") { - reader.DecodeBlocks(kvp_cd, decoded_out_file.get(), &encoded_blocks); - } else { - reader.DecodeBlocksFromRowFormat(decoded_out_file.get(), - &encoded_blocks); - } - uint64_t decode_time = sw.ElapsedNanosSafe(true /* reset */); - fprintf(stdout, "Decode time: %" PRIu64 "\n", decode_time); - } - } else { - fprintf(stdout, "Unsupported compression type: %s.\n", - FLAGS_compression_type.c_str()); - } - delete key_col_declarations; - delete value_col_declarations; - delete value_checksum_declaration; - } -}; - -} // namespace rocksdb - -int main(int argc, char** argv) { - int arg_idx = ParseCommandLineFlags(&argc, &argv, true); - if (arg_idx >= argc) { - fprintf(stdout, "SST filename required.\n"); - exit(1); - } - std::string sst_file(argv[arg_idx]); - if (FLAGS_format != "row" && FLAGS_format != "col") { - fprintf(stderr, "Format must be 'row' or 'col'\n"); - exit(1); - } - if (FLAGS_index_type != "primary" && FLAGS_index_type != "secondary") { - fprintf(stderr, "Format must be 'primary' or 'secondary'\n"); - exit(1); - } - rocksdb::ColumnAwareEncodingExp::Run(sst_file); - return 0; -} - -#else -int main() { - fprintf(stderr, "Please install gflags to run rocksdb tools\n"); - return 1; -} -#endif // GFLAGS -#else -int main(int /*argc*/, char** /*argv*/) { - fprintf(stderr, "Not supported in lite mode.\n"); - return 1; -} -#endif // ROCKSDB_LITE diff --git a/utilities/column_aware_encoding_test.cc b/utilities/column_aware_encoding_test.cc deleted file mode 100644 index b99ff563a..000000000 --- a/utilities/column_aware_encoding_test.cc +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#ifndef ROCKSDB_LITE - -#include -#include "util/testharness.h" -#include "util/testutil.h" -#include "utilities/col_buf_decoder.h" -#include "utilities/col_buf_encoder.h" - -namespace rocksdb { - -class ColumnAwareEncodingTest : public testing::Test { - public: - ColumnAwareEncodingTest() {} - - ~ColumnAwareEncodingTest() {} -}; - -class ColumnAwareEncodingTestWithSize - : public ColumnAwareEncodingTest, - public testing::WithParamInterface { - public: - ColumnAwareEncodingTestWithSize() {} - - ~ColumnAwareEncodingTestWithSize() {} - - static std::vector GetValues() { return {4, 8}; } -}; - -INSTANTIATE_TEST_CASE_P( - ColumnAwareEncodingTestWithSize, ColumnAwareEncodingTestWithSize, - ::testing::ValuesIn(ColumnAwareEncodingTestWithSize::GetValues())); - -TEST_P(ColumnAwareEncodingTestWithSize, NoCompressionEncodeDecode) { - size_t col_size = GetParam(); - std::unique_ptr col_buf_encoder( - new FixedLengthColBufEncoder(col_size, kColNoCompression, false, true)); - std::string str_buf; - uint64_t base_val = 0x0102030405060708; - uint64_t val = 0; - memcpy(&val, &base_val, col_size); - const int row_count = 4; - for (int i = 0; i < row_count; ++i) { - str_buf.append(reinterpret_cast(&val), col_size); - } - const char* str_buf_ptr = str_buf.c_str(); - for (int i = 0; i < row_count; ++i) { - col_buf_encoder->Append(str_buf_ptr); - } - col_buf_encoder->Finish(); - const std::string& encoded_data = col_buf_encoder->GetData(); - // Check correctness of encoded string length - ASSERT_EQ(row_count * col_size, encoded_data.size()); - - const char* encoded_data_ptr = encoded_data.c_str(); - uint64_t expected_encoded_val; - if (col_size == 8) { - expected_encoded_val = port::kLittleEndian ? 0x0807060504030201 : 0x0102030405060708; - } else if (col_size == 4) { - expected_encoded_val = port::kLittleEndian ? 0x08070605 : 0x0102030400000000; - } - uint64_t encoded_val = 0; - for (int i = 0; i < row_count; ++i) { - memcpy(&encoded_val, encoded_data_ptr, col_size); - // Check correctness of encoded value - ASSERT_EQ(expected_encoded_val, encoded_val); - encoded_data_ptr += col_size; - } - - std::unique_ptr col_buf_decoder( - new FixedLengthColBufDecoder(col_size, kColNoCompression, false, true)); - encoded_data_ptr = encoded_data.c_str(); - encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr); - char* decoded_data = new char[100]; - char* decoded_data_base = decoded_data; - for (int i = 0; i < row_count; ++i) { - encoded_data_ptr += - col_buf_decoder->Decode(encoded_data_ptr, &decoded_data); - } - - // Check correctness of decoded string length - ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base); - decoded_data = decoded_data_base; - for (int i = 0; i < row_count; ++i) { - uint64_t decoded_val; - decoded_val = 0; - memcpy(&decoded_val, decoded_data, col_size); - // Check correctness of decoded value - ASSERT_EQ(val, decoded_val); - decoded_data += col_size; - } - delete[] decoded_data_base; -} - -TEST_P(ColumnAwareEncodingTestWithSize, RleEncodeDecode) { - size_t col_size = GetParam(); - std::unique_ptr col_buf_encoder( - new FixedLengthColBufEncoder(col_size, kColRle, false, true)); - std::string str_buf; - uint64_t base_val = 0x0102030405060708; - uint64_t val = 0; - memcpy(&val, &base_val, col_size); - const int row_count = 4; - for (int i = 0; i < row_count; ++i) { - str_buf.append(reinterpret_cast(&val), col_size); - } - const char* str_buf_ptr = str_buf.c_str(); - for (int i = 0; i < row_count; ++i) { - str_buf_ptr += col_buf_encoder->Append(str_buf_ptr); - } - col_buf_encoder->Finish(); - const std::string& encoded_data = col_buf_encoder->GetData(); - // Check correctness of encoded string length - ASSERT_EQ(col_size + 1, encoded_data.size()); - - const char* encoded_data_ptr = encoded_data.c_str(); - uint64_t encoded_val = 0; - memcpy(&encoded_val, encoded_data_ptr, col_size); - uint64_t expected_encoded_val; - if (col_size == 8) { - expected_encoded_val = port::kLittleEndian ? 0x0807060504030201 : 0x0102030405060708; - } else if (col_size == 4) { - expected_encoded_val = port::kLittleEndian ? 0x08070605 : 0x0102030400000000; - } - // Check correctness of encoded value - ASSERT_EQ(expected_encoded_val, encoded_val); - - std::unique_ptr col_buf_decoder( - new FixedLengthColBufDecoder(col_size, kColRle, false, true)); - char* decoded_data = new char[100]; - char* decoded_data_base = decoded_data; - encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr); - for (int i = 0; i < row_count; ++i) { - encoded_data_ptr += - col_buf_decoder->Decode(encoded_data_ptr, &decoded_data); - } - // Check correctness of decoded string length - ASSERT_EQ(decoded_data - decoded_data_base, row_count * col_size); - decoded_data = decoded_data_base; - for (int i = 0; i < row_count; ++i) { - uint64_t decoded_val; - decoded_val = 0; - memcpy(&decoded_val, decoded_data, col_size); - // Check correctness of decoded value - ASSERT_EQ(val, decoded_val); - decoded_data += col_size; - } - delete[] decoded_data_base; -} - -TEST_P(ColumnAwareEncodingTestWithSize, DeltaEncodeDecode) { - size_t col_size = GetParam(); - int row_count = 4; - std::unique_ptr col_buf_encoder( - new FixedLengthColBufEncoder(col_size, kColDeltaVarint, false, true)); - std::string str_buf; - uint64_t base_val1 = port::kLittleEndian ? 0x0102030405060708 : 0x0807060504030201; - uint64_t base_val2 = port::kLittleEndian ? 0x0202030405060708 : 0x0807060504030202; - uint64_t val1 = 0, val2 = 0; - memcpy(&val1, &base_val1, col_size); - memcpy(&val2, &base_val2, col_size); - const char* str_buf_ptr; - for (int i = 0; i < row_count / 2; ++i) { - str_buf = std::string(reinterpret_cast(&val1), col_size); - str_buf_ptr = str_buf.c_str(); - col_buf_encoder->Append(str_buf_ptr); - - str_buf = std::string(reinterpret_cast(&val2), col_size); - str_buf_ptr = str_buf.c_str(); - col_buf_encoder->Append(str_buf_ptr); - } - col_buf_encoder->Finish(); - const std::string& encoded_data = col_buf_encoder->GetData(); - // Check encoded string length - int varint_len = 0; - if (col_size == 8) { - varint_len = 9; - } else if (col_size == 4) { - varint_len = port::kLittleEndian ? 5 : 9; - } - // Check encoded string length: first value is original one (val - 0), the - // coming three are encoded as 1, -1, 1, so they should take 1 byte in varint. - ASSERT_EQ(varint_len + 3 * 1, encoded_data.size()); - - std::unique_ptr col_buf_decoder( - new FixedLengthColBufDecoder(col_size, kColDeltaVarint, false, true)); - char* decoded_data = new char[100]; - char* decoded_data_base = decoded_data; - const char* encoded_data_ptr = encoded_data.c_str(); - encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr); - for (int i = 0; i < row_count; ++i) { - encoded_data_ptr += - col_buf_decoder->Decode(encoded_data_ptr, &decoded_data); - } - - // Check correctness of decoded string length - ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base); - decoded_data = decoded_data_base; - - // Check correctness of decoded data - for (int i = 0; i < row_count / 2; ++i) { - uint64_t decoded_val = 0; - memcpy(&decoded_val, decoded_data, col_size); - ASSERT_EQ(val1, decoded_val); - decoded_data += col_size; - memcpy(&decoded_val, decoded_data, col_size); - ASSERT_EQ(val2, decoded_val); - decoded_data += col_size; - } - delete[] decoded_data_base; -} - -TEST_F(ColumnAwareEncodingTest, ChunkBufEncodeDecode) { - std::unique_ptr col_buf_encoder( - new VariableChunkColBufEncoder(kColDict)); - std::string buf("12345678\377\1\0\0\0\0\0\0\0\376", 18); - col_buf_encoder->Append(buf.c_str()); - col_buf_encoder->Finish(); - const std::string& encoded_data = col_buf_encoder->GetData(); - const char* str_ptr = encoded_data.c_str(); - - std::unique_ptr col_buf_decoder( - new VariableChunkColBufDecoder(kColDict)); - str_ptr += col_buf_decoder->Init(str_ptr); - char* decoded_data = new char[100]; - char* decoded_data_base = decoded_data; - col_buf_decoder->Decode(str_ptr, &decoded_data); - for (size_t i = 0; i < buf.size(); ++i) { - ASSERT_EQ(buf[i], decoded_data_base[i]); - } - delete[] decoded_data_base; -} - -} // namespace rocksdb - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -#else - -#include - -int main() { - fprintf(stderr, - "SKIPPED as column aware encoding experiment is not enabled in " - "ROCKSDB_LITE\n"); -} -#endif // ROCKSDB_LITE diff --git a/utilities/column_aware_encoding_util.cc b/utilities/column_aware_encoding_util.cc deleted file mode 100644 index 222ee4680..000000000 --- a/utilities/column_aware_encoding_util.cc +++ /dev/null @@ -1,491 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#ifndef ROCKSDB_LITE - -#include "utilities/column_aware_encoding_util.h" - -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif - -#include -#include -#include -#include -#include -#include -#include "include/rocksdb/comparator.h" -#include "include/rocksdb/slice.h" -#include "rocksdb/env.h" -#include "rocksdb/status.h" -#include "table/block_based_table_builder.h" -#include "table/block_based_table_factory.h" -#include "table/format.h" -#include "table/table_reader.h" -#include "util/cast_util.h" -#include "util/coding.h" -#include "utilities/col_buf_decoder.h" -#include "utilities/col_buf_encoder.h" - -#include "port/port.h" - -namespace rocksdb { - -ColumnAwareEncodingReader::ColumnAwareEncodingReader( - const std::string& file_path) - : file_name_(file_path), - ioptions_(options_), - moptions_(options_), - internal_comparator_(BytewiseComparator()) { - InitTableReader(file_name_); -} - -void ColumnAwareEncodingReader::InitTableReader(const std::string& file_path) { - std::unique_ptr file; - uint64_t file_size; - options_.env->NewRandomAccessFile(file_path, &file, soptions_); - options_.env->GetFileSize(file_path, &file_size); - - file_.reset(new RandomAccessFileReader(std::move(file), file_path)); - - options_.comparator = &internal_comparator_; - options_.table_factory = std::make_shared(); - - std::unique_ptr table_reader; - options_.table_factory->NewTableReader( - TableReaderOptions(ioptions_, moptions_.prefix_extractor.get(), soptions_, - internal_comparator_), - std::move(file_), file_size, &table_reader, /*enable_prefetch=*/false); - - table_reader_.reset(static_cast_with_check( - table_reader.release())); -} - -void ColumnAwareEncodingReader::GetKVPairsFromDataBlocks( - std::vector* kv_pair_blocks) { - table_reader_->GetKVPairsFromDataBlocks(kv_pair_blocks); -} - -void ColumnAwareEncodingReader::DecodeBlocks( - const KVPairColDeclarations& kvp_col_declarations, WritableFile* out_file, - const std::vector* blocks) { - char* decoded_content_base = new char[16384]; - Options options; - ImmutableCFOptions ioptions(options); - for (auto& block : *blocks) { - KVPairColBufDecoders kvp_col_bufs(kvp_col_declarations); - auto& key_col_bufs = kvp_col_bufs.key_col_bufs; - auto& value_col_bufs = kvp_col_bufs.value_col_bufs; - auto& value_checksum_buf = kvp_col_bufs.value_checksum_buf; - - auto& slice_final_with_bit = block; - uint32_t format_version = 2; - BlockContents contents; - const char* content_ptr; - - CompressionType type = - (CompressionType)slice_final_with_bit[slice_final_with_bit.size() - 1]; - if (type != kNoCompression) { - UncompressionContext uncompression_ctx(type); - UncompressBlockContents(uncompression_ctx, slice_final_with_bit.c_str(), - slice_final_with_bit.size() - 1, &contents, - format_version, ioptions); - content_ptr = contents.data.data(); - } else { - content_ptr = slice_final_with_bit.data(); - } - - size_t num_kv_pairs; - const char* header_content_ptr = content_ptr; - num_kv_pairs = static_cast(DecodeFixed64(header_content_ptr)); - - header_content_ptr += sizeof(size_t); - size_t num_key_columns = key_col_bufs.size(); - size_t num_value_columns = value_col_bufs.size(); - std::vector key_content_ptr(num_key_columns); - std::vector value_content_ptr(num_value_columns); - const char* checksum_content_ptr; - - size_t num_columns = num_key_columns + num_value_columns; - const char* col_content_ptr = - header_content_ptr + sizeof(size_t) * num_columns; - - // Read headers - for (size_t i = 0; i < num_key_columns; ++i) { - key_content_ptr[i] = col_content_ptr; - key_content_ptr[i] += key_col_bufs[i]->Init(key_content_ptr[i]); - size_t offset; - offset = static_cast(DecodeFixed64(header_content_ptr)); - header_content_ptr += sizeof(size_t); - col_content_ptr += offset; - } - for (size_t i = 0; i < num_value_columns; ++i) { - value_content_ptr[i] = col_content_ptr; - value_content_ptr[i] += value_col_bufs[i]->Init(value_content_ptr[i]); - size_t offset; - offset = static_cast(DecodeFixed64(header_content_ptr)); - header_content_ptr += sizeof(size_t); - col_content_ptr += offset; - } - checksum_content_ptr = col_content_ptr; - checksum_content_ptr += value_checksum_buf->Init(checksum_content_ptr); - - // Decode block - char* decoded_content = decoded_content_base; - for (size_t j = 0; j < num_kv_pairs; ++j) { - for (size_t i = 0; i < num_key_columns; ++i) { - key_content_ptr[i] += - key_col_bufs[i]->Decode(key_content_ptr[i], &decoded_content); - } - for (size_t i = 0; i < num_value_columns; ++i) { - value_content_ptr[i] += - value_col_bufs[i]->Decode(value_content_ptr[i], &decoded_content); - } - checksum_content_ptr += - value_checksum_buf->Decode(checksum_content_ptr, &decoded_content); - } - - size_t offset = decoded_content - decoded_content_base; - Slice output_content(decoded_content, offset); - - if (out_file != nullptr) { - out_file->Append(output_content); - } - } - delete[] decoded_content_base; -} - -void ColumnAwareEncodingReader::DecodeBlocksFromRowFormat( - WritableFile* out_file, const std::vector* blocks) { - Options options; - ImmutableCFOptions ioptions(options); - for (auto& block : *blocks) { - auto& slice_final_with_bit = block; - uint32_t format_version = 2; - BlockContents contents; - std::string decoded_content; - - CompressionType type = - (CompressionType)slice_final_with_bit[slice_final_with_bit.size() - 1]; - if (type != kNoCompression) { - UncompressionContext uncompression_ctx(type); - UncompressBlockContents(uncompression_ctx, slice_final_with_bit.c_str(), - slice_final_with_bit.size() - 1, &contents, - format_version, ioptions); - decoded_content = std::string(contents.data.data(), contents.data.size()); - } else { - decoded_content = std::move(slice_final_with_bit); - } - - if (out_file != nullptr) { - out_file->Append(decoded_content); - } - } -} - -void ColumnAwareEncodingReader::DumpDataColumns( - const std::string& filename, - const KVPairColDeclarations& kvp_col_declarations, - const std::vector& kv_pair_blocks) { - KVPairColBufEncoders kvp_col_bufs(kvp_col_declarations); - auto& key_col_bufs = kvp_col_bufs.key_col_bufs; - auto& value_col_bufs = kvp_col_bufs.value_col_bufs; - auto& value_checksum_buf = kvp_col_bufs.value_checksum_buf; - - FILE* fp = fopen(filename.c_str(), "w"); - size_t block_id = 1; - for (auto& kv_pairs : kv_pair_blocks) { - fprintf(fp, "---------------- Block: %-4" ROCKSDB_PRIszt " ----------------\n", block_id); - for (auto& kv_pair : kv_pairs) { - const auto& key = kv_pair.first; - const auto& value = kv_pair.second; - size_t value_offset = 0; - - const char* key_ptr = key.data(); - for (auto& buf : key_col_bufs) { - size_t col_size = buf->Append(key_ptr); - std::string tmp_buf(key_ptr, col_size); - Slice col(tmp_buf); - fprintf(fp, "%s ", col.ToString(true).c_str()); - key_ptr += col_size; - } - fprintf(fp, "|"); - - const char* value_ptr = value.data(); - for (auto& buf : value_col_bufs) { - size_t col_size = buf->Append(value_ptr); - std::string tmp_buf(value_ptr, col_size); - Slice col(tmp_buf); - fprintf(fp, " %s", col.ToString(true).c_str()); - value_ptr += col_size; - value_offset += col_size; - } - - if (value_offset < value.size()) { - size_t col_size = value_checksum_buf->Append(value_ptr); - std::string tmp_buf(value_ptr, col_size); - Slice col(tmp_buf); - fprintf(fp, "|%s", col.ToString(true).c_str()); - } else { - value_checksum_buf->Append(nullptr); - } - fprintf(fp, "\n"); - } - block_id++; - } - fclose(fp); -} - -namespace { - -void CompressDataBlock(const std::string& output_content, Slice* slice_final, - CompressionType* type, std::string* compressed_output) { - CompressionContext compression_ctx(*type); - uint32_t format_version = 2; // hard-coded version - *slice_final = CompressBlock(output_content, compression_ctx, type, - format_version, compressed_output); -} - -} // namespace - -void ColumnAwareEncodingReader::EncodeBlocksToRowFormat( - WritableFile* out_file, CompressionType compression_type, - const std::vector& kv_pair_blocks, - std::vector* blocks) { - std::string output_content; - for (auto& kv_pairs : kv_pair_blocks) { - output_content.clear(); - std::string last_key; - size_t counter = 0; - const size_t block_restart_interval = 16; - for (auto& kv_pair : kv_pairs) { - const auto& key = kv_pair.first; - const auto& value = kv_pair.second; - - Slice last_key_piece(last_key); - size_t shared = 0; - if (counter >= block_restart_interval) { - counter = 0; - } else { - const size_t min_length = std::min(last_key_piece.size(), key.size()); - while ((shared < min_length) && last_key_piece[shared] == key[shared]) { - shared++; - } - } - const size_t non_shared = key.size() - shared; - output_content.append(key.c_str() + shared, non_shared); - output_content.append(value); - - last_key.resize(shared); - last_key.append(key.data() + shared, non_shared); - counter++; - } - Slice slice_final; - auto type = compression_type; - std::string compressed_output; - CompressDataBlock(output_content, &slice_final, &type, &compressed_output); - - if (out_file != nullptr) { - out_file->Append(slice_final); - } - - // Add a bit in the end for decoding - std::string slice_final_with_bit(slice_final.data(), slice_final.size()); - slice_final_with_bit.append(reinterpret_cast(&type), 1); - blocks->push_back( - std::string(slice_final_with_bit.data(), slice_final_with_bit.size())); - } -} - -Status ColumnAwareEncodingReader::EncodeBlocks( - const KVPairColDeclarations& kvp_col_declarations, WritableFile* out_file, - CompressionType compression_type, - const std::vector& kv_pair_blocks, - std::vector* blocks, bool print_column_stat) { - std::vector key_col_sizes( - kvp_col_declarations.key_col_declarations->size(), 0); - std::vector value_col_sizes( - kvp_col_declarations.value_col_declarations->size(), 0); - size_t value_checksum_size = 0; - - for (auto& kv_pairs : kv_pair_blocks) { - KVPairColBufEncoders kvp_col_bufs(kvp_col_declarations); - auto& key_col_bufs = kvp_col_bufs.key_col_bufs; - auto& value_col_bufs = kvp_col_bufs.value_col_bufs; - auto& value_checksum_buf = kvp_col_bufs.value_checksum_buf; - - size_t num_kv_pairs = 0; - for (auto& kv_pair : kv_pairs) { - const auto& key = kv_pair.first; - const auto& value = kv_pair.second; - size_t value_offset = 0; - num_kv_pairs++; - - const char* key_ptr = key.data(); - for (auto& buf : key_col_bufs) { - size_t col_size = buf->Append(key_ptr); - key_ptr += col_size; - } - - const char* value_ptr = value.data(); - for (auto& buf : value_col_bufs) { - size_t col_size = buf->Append(value_ptr); - value_ptr += col_size; - value_offset += col_size; - } - - if (value_offset < value.size()) { - value_checksum_buf->Append(value_ptr); - } else { - value_checksum_buf->Append(nullptr); - } - } - - kvp_col_bufs.Finish(); - // Get stats - // Compress and write a block - if (print_column_stat) { - for (size_t i = 0; i < key_col_bufs.size(); ++i) { - Slice slice_final; - auto type = compression_type; - std::string compressed_output; - CompressDataBlock(key_col_bufs[i]->GetData(), &slice_final, &type, - &compressed_output); - out_file->Append(slice_final); - key_col_sizes[i] += slice_final.size(); - } - for (size_t i = 0; i < value_col_bufs.size(); ++i) { - Slice slice_final; - auto type = compression_type; - std::string compressed_output; - CompressDataBlock(value_col_bufs[i]->GetData(), &slice_final, &type, - &compressed_output); - out_file->Append(slice_final); - value_col_sizes[i] += slice_final.size(); - } - Slice slice_final; - auto type = compression_type; - std::string compressed_output; - CompressDataBlock(value_checksum_buf->GetData(), &slice_final, &type, - &compressed_output); - out_file->Append(slice_final); - value_checksum_size += slice_final.size(); - } else { - std::string output_content; - // Write column sizes - PutFixed64(&output_content, num_kv_pairs); - for (auto& buf : key_col_bufs) { - size_t size = buf->GetData().size(); - PutFixed64(&output_content, size); - } - for (auto& buf : value_col_bufs) { - size_t size = buf->GetData().size(); - PutFixed64(&output_content, size); - } - // Write data - for (auto& buf : key_col_bufs) { - output_content.append(buf->GetData()); - } - for (auto& buf : value_col_bufs) { - output_content.append(buf->GetData()); - } - output_content.append(value_checksum_buf->GetData()); - - Slice slice_final; - auto type = compression_type; - std::string compressed_output; - CompressDataBlock(output_content, &slice_final, &type, - &compressed_output); - - if (out_file != nullptr) { - out_file->Append(slice_final); - } - - // Add a bit in the end for decoding - std::string slice_final_with_bit(slice_final.data(), - slice_final.size() + 1); - slice_final_with_bit[slice_final.size()] = static_cast(type); - blocks->push_back(std::string(slice_final_with_bit.data(), - slice_final_with_bit.size())); - } - } - - if (print_column_stat) { - size_t total_size = 0; - for (size_t i = 0; i < key_col_sizes.size(); ++i) - total_size += key_col_sizes[i]; - for (size_t i = 0; i < value_col_sizes.size(); ++i) - total_size += value_col_sizes[i]; - total_size += value_checksum_size; - - for (size_t i = 0; i < key_col_sizes.size(); ++i) - printf("Key col %" ROCKSDB_PRIszt " size: %" ROCKSDB_PRIszt " percentage %lf%%\n", i, key_col_sizes[i], - 100.0 * key_col_sizes[i] / total_size); - for (size_t i = 0; i < value_col_sizes.size(); ++i) - printf("Value col %" ROCKSDB_PRIszt " size: %" ROCKSDB_PRIszt " percentage %lf%%\n", i, - value_col_sizes[i], 100.0 * value_col_sizes[i] / total_size); - printf("Value checksum size: %" ROCKSDB_PRIszt " percentage %lf%%\n", value_checksum_size, - 100.0 * value_checksum_size / total_size); - } - return Status::OK(); -} - -void ColumnAwareEncodingReader::GetColDeclarationsPrimary( - std::vector** key_col_declarations, - std::vector** value_col_declarations, - ColDeclaration** value_checksum_declaration) { - *key_col_declarations = new std::vector{ - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4, false, - true), - ColDeclaration("FixedLength", ColCompressionType::kColRleDeltaVarint, 8, - false, true), - ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8, - false, true), - ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8, - false, true), - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8)}; - - *value_col_declarations = new std::vector{ - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4), - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4), - ColDeclaration("FixedLength", ColCompressionType::kColRle, 1), - ColDeclaration("VariableLength"), - ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 4), - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8)}; - *value_checksum_declaration = new ColDeclaration( - "LongFixedLength", ColCompressionType::kColNoCompression, 9, - true /* nullable */); -} - -void ColumnAwareEncodingReader::GetColDeclarationsSecondary( - std::vector** key_col_declarations, - std::vector** value_col_declarations, - ColDeclaration** value_checksum_declaration) { - *key_col_declarations = new std::vector{ - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4, false, - true), - ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8, - false, true), - ColDeclaration("FixedLength", ColCompressionType::kColRleDeltaVarint, 8, - false, true), - ColDeclaration("FixedLength", ColCompressionType::kColRle, 1), - ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 4, - false, true), - ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8, - false, true), - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8, false, - true), - ColDeclaration("VariableChunk", ColCompressionType::kColNoCompression), - ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8)}; - *value_col_declarations = new std::vector(); - *value_checksum_declaration = new ColDeclaration( - "LongFixedLength", ColCompressionType::kColNoCompression, 9, - true /* nullable */); -} - -} // namespace rocksdb - -#endif // ROCKSDB_LITE diff --git a/utilities/column_aware_encoding_util.h b/utilities/column_aware_encoding_util.h deleted file mode 100644 index c2c4fa2d6..000000000 --- a/utilities/column_aware_encoding_util.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#pragma once -#ifndef ROCKSDB_LITE - -#include -#include -#include "db/dbformat.h" -#include "include/rocksdb/env.h" -#include "include/rocksdb/listener.h" -#include "include/rocksdb/options.h" -#include "include/rocksdb/status.h" -#include "options/cf_options.h" -#include "table/block_based_table_reader.h" - -namespace rocksdb { - -struct ColDeclaration; -struct KVPairColDeclarations; - -class ColumnAwareEncodingReader { - public: - explicit ColumnAwareEncodingReader(const std::string& file_name); - - void GetKVPairsFromDataBlocks(std::vector* kv_pair_blocks); - - void EncodeBlocksToRowFormat(WritableFile* out_file, - CompressionType compression_type, - const std::vector& kv_pair_blocks, - std::vector* blocks); - - void DecodeBlocksFromRowFormat(WritableFile* out_file, - const std::vector* blocks); - - void DumpDataColumns(const std::string& filename, - const KVPairColDeclarations& kvp_col_declarations, - const std::vector& kv_pair_blocks); - - Status EncodeBlocks(const KVPairColDeclarations& kvp_col_declarations, - WritableFile* out_file, CompressionType compression_type, - const std::vector& kv_pair_blocks, - std::vector* blocks, bool print_column_stat); - - void DecodeBlocks(const KVPairColDeclarations& kvp_col_declarations, - WritableFile* out_file, - const std::vector* blocks); - - static void GetColDeclarationsPrimary( - std::vector** key_col_declarations, - std::vector** value_col_declarations, - ColDeclaration** value_checksum_declaration); - - static void GetColDeclarationsSecondary( - std::vector** key_col_declarations, - std::vector** value_col_declarations, - ColDeclaration** value_checksum_declaration); - - private: - // Init the TableReader for the sst file - void InitTableReader(const std::string& file_path); - - std::string file_name_; - EnvOptions soptions_; - - Options options_; - - Status init_result_; - std::unique_ptr table_reader_; - std::unique_ptr file_; - - const ImmutableCFOptions ioptions_; - const MutableCFOptions moptions_; - InternalKeyComparator internal_comparator_; - std::unique_ptr table_properties_; -}; - -} // namespace rocksdb - -#endif // ROCKSDB_LITE diff --git a/utilities/date_tiered/date_tiered_db_impl.cc b/utilities/date_tiered/date_tiered_db_impl.cc deleted file mode 100644 index 2574d379f..000000000 --- a/utilities/date_tiered/date_tiered_db_impl.cc +++ /dev/null @@ -1,399 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef ROCKSDB_LITE - -#include "utilities/date_tiered/date_tiered_db_impl.h" - -#include - -#include "db/db_impl.h" -#include "db/db_iter.h" -#include "db/write_batch_internal.h" -#include "monitoring/instrumented_mutex.h" -#include "options/options_helper.h" -#include "rocksdb/convenience.h" -#include "rocksdb/env.h" -#include "rocksdb/iterator.h" -#include "rocksdb/utilities/date_tiered_db.h" -#include "table/merging_iterator.h" -#include "util/coding.h" -#include "util/filename.h" -#include "util/string_util.h" - -namespace rocksdb { - -// Open the db inside DateTieredDBImpl because options needs pointer to its ttl -DateTieredDBImpl::DateTieredDBImpl( - DB* db, Options options, - const std::vector& descriptors, - const std::vector& handles, int64_t ttl, - int64_t column_family_interval) - : db_(db), - cf_options_(ColumnFamilyOptions(options)), - ioptions_(ImmutableCFOptions(options)), - moptions_(MutableCFOptions(options)), - icomp_(cf_options_.comparator), - ttl_(ttl), - column_family_interval_(column_family_interval), - mutex_(options.statistics.get(), db->GetEnv(), DB_MUTEX_WAIT_MICROS, - options.use_adaptive_mutex) { - latest_timebound_ = std::numeric_limits::min(); - for (size_t i = 0; i < handles.size(); ++i) { - const auto& name = descriptors[i].name; - int64_t timestamp = 0; - try { - timestamp = ParseUint64(name); - } catch (const std::invalid_argument&) { - // Bypass unrelated column family, e.g. default - db_->DestroyColumnFamilyHandle(handles[i]); - continue; - } - if (timestamp > latest_timebound_) { - latest_timebound_ = timestamp; - } - handle_map_.insert(std::make_pair(timestamp, handles[i])); - } -} - -DateTieredDBImpl::~DateTieredDBImpl() { - for (auto handle : handle_map_) { - db_->DestroyColumnFamilyHandle(handle.second); - } - delete db_; - db_ = nullptr; -} - -Status DateTieredDB::Open(const Options& options, const std::string& dbname, - DateTieredDB** dbptr, int64_t ttl, - int64_t column_family_interval, bool read_only) { - DBOptions db_options(options); - ColumnFamilyOptions cf_options(options); - std::vector descriptors; - std::vector handles; - DB* db; - Status s; - - // Get column families - std::vector column_family_names; - s = DB::ListColumnFamilies(db_options, dbname, &column_family_names); - if (!s.ok()) { - // No column family found. Use default - s = DB::Open(options, dbname, &db); - if (!s.ok()) { - return s; - } - } else { - for (auto name : column_family_names) { - descriptors.emplace_back(ColumnFamilyDescriptor(name, cf_options)); - } - - // Open database - if (read_only) { - s = DB::OpenForReadOnly(db_options, dbname, descriptors, &handles, &db); - } else { - s = DB::Open(db_options, dbname, descriptors, &handles, &db); - } - } - - if (s.ok()) { - *dbptr = new DateTieredDBImpl(db, options, descriptors, handles, ttl, - column_family_interval); - } - return s; -} - -// Checks if the string is stale or not according to TTl provided -bool DateTieredDBImpl::IsStale(int64_t keytime, int64_t ttl, Env* env) { - if (ttl <= 0) { - // Data is fresh if TTL is non-positive - return false; - } - int64_t curtime; - if (!env->GetCurrentTime(&curtime).ok()) { - // Treat the data as fresh if could not get current time - return false; - } - return curtime >= keytime + ttl; -} - -// Drop column family when all data in that column family is expired -// TODO(jhli): Can be made a background job -Status DateTieredDBImpl::DropObsoleteColumnFamilies() { - int64_t curtime; - Status s; - s = db_->GetEnv()->GetCurrentTime(&curtime); - if (!s.ok()) { - return s; - } - { - InstrumentedMutexLock l(&mutex_); - auto iter = handle_map_.begin(); - while (iter != handle_map_.end()) { - if (iter->first <= curtime - ttl_) { - s = db_->DropColumnFamily(iter->second); - if (!s.ok()) { - return s; - } - delete iter->second; - iter = handle_map_.erase(iter); - } else { - break; - } - } - } - return Status::OK(); -} - -// Get timestamp from user key -Status DateTieredDBImpl::GetTimestamp(const Slice& key, int64_t* result) { - if (key.size() < kTSLength) { - return Status::Corruption("Bad timestamp in key"); - } - const char* pos = key.data() + key.size() - 8; - int64_t timestamp = 0; - if (port::kLittleEndian) { - int bytes_to_fill = 8; - for (int i = 0; i < bytes_to_fill; ++i) { - timestamp |= (static_cast(static_cast(pos[i])) - << ((bytes_to_fill - i - 1) << 3)); - } - } else { - memcpy(×tamp, pos, sizeof(timestamp)); - } - *result = timestamp; - return Status::OK(); -} - -Status DateTieredDBImpl::CreateColumnFamily( - ColumnFamilyHandle** column_family) { - int64_t curtime; - Status s; - mutex_.AssertHeld(); - s = db_->GetEnv()->GetCurrentTime(&curtime); - if (!s.ok()) { - return s; - } - int64_t new_timebound; - if (handle_map_.empty()) { - new_timebound = curtime + column_family_interval_; - } else { - new_timebound = - latest_timebound_ + - ((curtime - latest_timebound_) / column_family_interval_ + 1) * - column_family_interval_; - } - std::string cf_name = ToString(new_timebound); - latest_timebound_ = new_timebound; - s = db_->CreateColumnFamily(cf_options_, cf_name, column_family); - if (s.ok()) { - handle_map_.insert(std::make_pair(new_timebound, *column_family)); - } - return s; -} - -Status DateTieredDBImpl::FindColumnFamily(int64_t keytime, - ColumnFamilyHandle** column_family, - bool create_if_missing) { - *column_family = nullptr; - { - InstrumentedMutexLock l(&mutex_); - auto iter = handle_map_.upper_bound(keytime); - if (iter == handle_map_.end()) { - if (!create_if_missing) { - return Status::NotFound(); - } else { - return CreateColumnFamily(column_family); - } - } - // Move to previous element to get the appropriate time window - *column_family = iter->second; - } - return Status::OK(); -} - -Status DateTieredDBImpl::Put(const WriteOptions& options, const Slice& key, - const Slice& val) { - int64_t timestamp = 0; - Status s; - s = GetTimestamp(key, ×tamp); - if (!s.ok()) { - return s; - } - DropObsoleteColumnFamilies(); - - // Prune request to obsolete data - if (IsStale(timestamp, ttl_, db_->GetEnv())) { - return Status::InvalidArgument(); - } - - // Decide column family (i.e. the time window) to put into - ColumnFamilyHandle* column_family; - s = FindColumnFamily(timestamp, &column_family, true /*create_if_missing*/); - if (!s.ok()) { - return s; - } - - // Efficiently put with WriteBatch - WriteBatch batch; - batch.Put(column_family, key, val); - return Write(options, &batch); -} - -Status DateTieredDBImpl::Get(const ReadOptions& options, const Slice& key, - std::string* value) { - int64_t timestamp = 0; - Status s; - s = GetTimestamp(key, ×tamp); - if (!s.ok()) { - return s; - } - // Prune request to obsolete data - if (IsStale(timestamp, ttl_, db_->GetEnv())) { - return Status::NotFound(); - } - - // Decide column family to get from - ColumnFamilyHandle* column_family; - s = FindColumnFamily(timestamp, &column_family, false /*create_if_missing*/); - if (!s.ok()) { - return s; - } - if (column_family == nullptr) { - // Cannot find column family - return Status::NotFound(); - } - - // Get value with key - return db_->Get(options, column_family, key, value); -} - -bool DateTieredDBImpl::KeyMayExist(const ReadOptions& options, const Slice& key, - std::string* value, bool* value_found) { - int64_t timestamp = 0; - Status s; - s = GetTimestamp(key, ×tamp); - if (!s.ok()) { - // Cannot get current time - return false; - } - // Decide column family to get from - ColumnFamilyHandle* column_family; - s = FindColumnFamily(timestamp, &column_family, false /*create_if_missing*/); - if (!s.ok() || column_family == nullptr) { - // Cannot find column family - return false; - } - if (IsStale(timestamp, ttl_, db_->GetEnv())) { - return false; - } - return db_->KeyMayExist(options, column_family, key, value, value_found); -} - -Status DateTieredDBImpl::Delete(const WriteOptions& options, const Slice& key) { - int64_t timestamp = 0; - Status s; - s = GetTimestamp(key, ×tamp); - if (!s.ok()) { - return s; - } - DropObsoleteColumnFamilies(); - // Prune request to obsolete data - if (IsStale(timestamp, ttl_, db_->GetEnv())) { - return Status::NotFound(); - } - - // Decide column family to get from - ColumnFamilyHandle* column_family; - s = FindColumnFamily(timestamp, &column_family, false /*create_if_missing*/); - if (!s.ok()) { - return s; - } - if (column_family == nullptr) { - // Cannot find column family - return Status::NotFound(); - } - - // Get value with key - return db_->Delete(options, column_family, key); -} - -Status DateTieredDBImpl::Merge(const WriteOptions& options, const Slice& key, - const Slice& value) { - // Decide column family to get from - int64_t timestamp = 0; - Status s; - s = GetTimestamp(key, ×tamp); - if (!s.ok()) { - // Cannot get current time - return s; - } - ColumnFamilyHandle* column_family; - s = FindColumnFamily(timestamp, &column_family, true /*create_if_missing*/); - if (!s.ok()) { - return s; - } - WriteBatch batch; - batch.Merge(column_family, key, value); - return Write(options, &batch); -} - -Status DateTieredDBImpl::Write(const WriteOptions& opts, WriteBatch* updates) { - class Handler : public WriteBatch::Handler { - public: - explicit Handler() {} - WriteBatch updates_ttl; - Status batch_rewrite_status; - virtual Status PutCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - WriteBatchInternal::Put(&updates_ttl, column_family_id, key, value); - return Status::OK(); - } - virtual Status MergeCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - WriteBatchInternal::Merge(&updates_ttl, column_family_id, key, value); - return Status::OK(); - } - virtual Status DeleteCF(uint32_t column_family_id, - const Slice& key) override { - WriteBatchInternal::Delete(&updates_ttl, column_family_id, key); - return Status::OK(); - } - virtual void LogData(const Slice& blob) override { - updates_ttl.PutLogData(blob); - } - }; - Handler handler; - updates->Iterate(&handler); - if (!handler.batch_rewrite_status.ok()) { - return handler.batch_rewrite_status; - } else { - return db_->Write(opts, &(handler.updates_ttl)); - } -} - -Iterator* DateTieredDBImpl::NewIterator(const ReadOptions& opts) { - if (handle_map_.empty()) { - return NewEmptyIterator(); - } - - DBImpl* db_impl = reinterpret_cast(db_); - - auto db_iter = NewArenaWrappedDbIterator( - db_impl->GetEnv(), opts, ioptions_, moptions_, kMaxSequenceNumber, - cf_options_.max_sequential_skip_in_iterations, 0, - nullptr /*read_callback*/); - - auto arena = db_iter->GetArena(); - MergeIteratorBuilder builder(&icomp_, arena); - for (auto& item : handle_map_) { - auto handle = item.second; - builder.AddIterator(db_impl->NewInternalIterator( - arena, db_iter->GetRangeDelAggregator(), kMaxSequenceNumber, handle)); - } - auto internal_iter = builder.Finish(); - db_iter->SetIterUnderDBIter(internal_iter); - return db_iter; -} -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/date_tiered/date_tiered_db_impl.h b/utilities/date_tiered/date_tiered_db_impl.h deleted file mode 100644 index 7a6a6b75a..000000000 --- a/utilities/date_tiered/date_tiered_db_impl.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#ifndef ROCKSDB_LITE - -#include -#include -#include - -#include "monitoring/instrumented_mutex.h" -#include "options/cf_options.h" -#include "rocksdb/db.h" -#include "rocksdb/utilities/date_tiered_db.h" - -namespace rocksdb { - -// Implementation of DateTieredDB. -class DateTieredDBImpl : public DateTieredDB { - public: - DateTieredDBImpl(DB* db, Options options, - const std::vector& descriptors, - const std::vector& handles, int64_t ttl, - int64_t column_family_interval); - - virtual ~DateTieredDBImpl(); - - Status Put(const WriteOptions& options, const Slice& key, - const Slice& val) override; - - Status Get(const ReadOptions& options, const Slice& key, - std::string* value) override; - - Status Delete(const WriteOptions& options, const Slice& key) override; - - bool KeyMayExist(const ReadOptions& options, const Slice& key, - std::string* value, bool* value_found = nullptr) override; - - Status Merge(const WriteOptions& options, const Slice& key, - const Slice& value) override; - - Iterator* NewIterator(const ReadOptions& opts) override; - - Status DropObsoleteColumnFamilies() override; - - // Extract timestamp from key. - static Status GetTimestamp(const Slice& key, int64_t* result); - - private: - // Base database object - DB* db_; - - const ColumnFamilyOptions cf_options_; - - const ImmutableCFOptions ioptions_; - - const MutableCFOptions moptions_; - - const InternalKeyComparator icomp_; - - // Storing all column family handles for time series data. - std::vector handles_; - - // Manages a mapping from a column family's maximum timestamp to its handle. - std::map handle_map_; - - // A time-to-live value to indicate when the data should be removed. - int64_t ttl_; - - // An variable to indicate the time range of a column family. - int64_t column_family_interval_; - - // Indicate largest maximum timestamp of a column family. - int64_t latest_timebound_; - - // Mutex to protect handle_map_ operations. - InstrumentedMutex mutex_; - - // Internal method to execute Put and Merge in batch. - Status Write(const WriteOptions& opts, WriteBatch* updates); - - Status CreateColumnFamily(ColumnFamilyHandle** column_family); - - Status FindColumnFamily(int64_t keytime, ColumnFamilyHandle** column_family, - bool create_if_missing); - - static bool IsStale(int64_t keytime, int64_t ttl, Env* env); -}; - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/date_tiered/date_tiered_test.cc b/utilities/date_tiered/date_tiered_test.cc deleted file mode 100644 index 35f15584e..000000000 --- a/utilities/date_tiered/date_tiered_test.cc +++ /dev/null @@ -1,469 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef ROCKSDB_LITE - -#ifndef OS_WIN -#include -#endif - -#include -#include - -#include "rocksdb/compaction_filter.h" -#include "rocksdb/utilities/date_tiered_db.h" -#include "port/port.h" -#include "util/logging.h" -#include "util/string_util.h" -#include "util/testharness.h" - -namespace rocksdb { - -namespace { - -typedef std::map KVMap; -} - -class SpecialTimeEnv : public EnvWrapper { - public: - explicit SpecialTimeEnv(Env* base) : EnvWrapper(base) { - base->GetCurrentTime(¤t_time_); - } - - void Sleep(int64_t sleep_time) { current_time_ += sleep_time; } - virtual Status GetCurrentTime(int64_t* current_time) override { - *current_time = current_time_; - return Status::OK(); - } - - private: - int64_t current_time_ = 0; -}; - -class DateTieredTest : public testing::Test { - public: - DateTieredTest() { - env_.reset(new SpecialTimeEnv(Env::Default())); - dbname_ = test::PerThreadDBPath("date_tiered"); - options_.create_if_missing = true; - options_.env = env_.get(); - date_tiered_db_.reset(nullptr); - DestroyDB(dbname_, Options()); - } - - ~DateTieredTest() { - CloseDateTieredDB(); - DestroyDB(dbname_, Options()); - } - - void OpenDateTieredDB(int64_t ttl, int64_t column_family_interval, - bool read_only = false) { - ASSERT_TRUE(date_tiered_db_.get() == nullptr); - DateTieredDB* date_tiered_db = nullptr; - ASSERT_OK(DateTieredDB::Open(options_, dbname_, &date_tiered_db, ttl, - column_family_interval, read_only)); - date_tiered_db_.reset(date_tiered_db); - } - - void CloseDateTieredDB() { date_tiered_db_.reset(nullptr); } - - Status AppendTimestamp(std::string* key) { - char ts[8]; - int bytes_to_fill = 8; - int64_t timestamp_value = 0; - Status s = env_->GetCurrentTime(×tamp_value); - if (!s.ok()) { - return s; - } - if (port::kLittleEndian) { - for (int i = 0; i < bytes_to_fill; ++i) { - ts[i] = (timestamp_value >> ((bytes_to_fill - i - 1) << 3)) & 0xFF; - } - } else { - memcpy(ts, static_cast(×tamp_value), bytes_to_fill); - } - key->append(ts, 8); - return Status::OK(); - } - - // Populates and returns a kv-map - void MakeKVMap(int64_t num_entries, KVMap* kvmap) { - kvmap->clear(); - int digits = 1; - for (int64_t dummy = num_entries; dummy /= 10; ++digits) { - } - int digits_in_i = 1; - for (int64_t i = 0; i < num_entries; i++) { - std::string key = "key"; - std::string value = "value"; - if (i % 10 == 0) { - digits_in_i++; - } - for (int j = digits_in_i; j < digits; j++) { - key.append("0"); - value.append("0"); - } - AppendNumberTo(&key, i); - AppendNumberTo(&value, i); - ASSERT_OK(AppendTimestamp(&key)); - (*kvmap)[key] = value; - } - // check all insertions done - ASSERT_EQ(num_entries, static_cast(kvmap->size())); - } - - size_t GetColumnFamilyCount() { - DBOptions db_options(options_); - std::vector cf; - DB::ListColumnFamilies(db_options, dbname_, &cf); - return cf.size(); - } - - void Sleep(int64_t sleep_time) { env_->Sleep(sleep_time); } - - static const int64_t kSampleSize_ = 100; - std::string dbname_; - std::unique_ptr date_tiered_db_; - std::unique_ptr env_; - KVMap kvmap_; - - private: - Options options_; - KVMap::iterator kv_it_; - const std::string kNewValue_ = "new_value"; - std::unique_ptr test_comp_filter_; -}; - -// Puts a set of values and checks its presence using Get during ttl -TEST_F(DateTieredTest, KeyLifeCycle) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(2, 2); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - - // Create key value pairs to insert - KVMap map_insert; - MakeKVMap(kSampleSize_, &map_insert); - - // Put data in database - for (auto& kv : map_insert) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - - Sleep(1); - // T=1, keys should still reside in database - for (auto& kv : map_insert) { - std::string value; - ASSERT_OK(date_tiered_db_->Get(ropts, kv.first, &value)); - ASSERT_EQ(value, kv.second); - } - - Sleep(1); - // T=2, keys should not be retrieved - for (auto& kv : map_insert) { - std::string value; - auto s = date_tiered_db_->Get(ropts, kv.first, &value); - ASSERT_TRUE(s.IsNotFound()); - } - - CloseDateTieredDB(); -} - -TEST_F(DateTieredTest, DeleteTest) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(2, 2); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - - // Create key value pairs to insert - KVMap map_insert; - MakeKVMap(kSampleSize_, &map_insert); - - // Put data in database - for (auto& kv : map_insert) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - - Sleep(1); - // Delete keys when they are not obsolete - for (auto& kv : map_insert) { - ASSERT_OK(date_tiered_db_->Delete(wopts, kv.first)); - } - - // Key should not be found - for (auto& kv : map_insert) { - std::string value; - auto s = date_tiered_db_->Get(ropts, kv.first, &value); - ASSERT_TRUE(s.IsNotFound()); - } -} - -TEST_F(DateTieredTest, KeyMayExistTest) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(2, 2); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - - // Create key value pairs to insert - KVMap map_insert; - MakeKVMap(kSampleSize_, &map_insert); - - // Put data in database - for (auto& kv : map_insert) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - - Sleep(1); - // T=1, keys should still reside in database - for (auto& kv : map_insert) { - std::string value; - ASSERT_TRUE(date_tiered_db_->KeyMayExist(ropts, kv.first, &value)); - ASSERT_EQ(value, kv.second); - } -} - -// Database open and close should not affect -TEST_F(DateTieredTest, MultiOpen) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(4, 4); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - - // Create key value pairs to insert - KVMap map_insert; - MakeKVMap(kSampleSize_, &map_insert); - - // Put data in database - for (auto& kv : map_insert) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - CloseDateTieredDB(); - - Sleep(1); - OpenDateTieredDB(2, 2); - // T=1, keys should still reside in database - for (auto& kv : map_insert) { - std::string value; - ASSERT_OK(date_tiered_db_->Get(ropts, kv.first, &value)); - ASSERT_EQ(value, kv.second); - } - - Sleep(1); - // T=2, keys should not be retrieved - for (auto& kv : map_insert) { - std::string value; - auto s = date_tiered_db_->Get(ropts, kv.first, &value); - ASSERT_TRUE(s.IsNotFound()); - } - - CloseDateTieredDB(); -} - -// If the key in Put() is obsolete, the data should not be written into database -TEST_F(DateTieredTest, InsertObsoleteDate) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(2, 2); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - - // Create key value pairs to insert - KVMap map_insert; - MakeKVMap(kSampleSize_, &map_insert); - - Sleep(2); - // T=2, keys put into database are already obsolete - // Put data in database. Operations should not return OK - for (auto& kv : map_insert) { - auto s = date_tiered_db_->Put(wopts, kv.first, kv.second); - ASSERT_TRUE(s.IsInvalidArgument()); - } - - // Data should not be found in database - for (auto& kv : map_insert) { - std::string value; - auto s = date_tiered_db_->Get(ropts, kv.first, &value); - ASSERT_TRUE(s.IsNotFound()); - } - - CloseDateTieredDB(); -} - -// Resets the timestamp of a set of kvs by updating them and checks that they -// are not deleted according to the old timestamp -TEST_F(DateTieredTest, ColumnFamilyCounts) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(4, 2); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - // Only default column family - ASSERT_EQ(1, GetColumnFamilyCount()); - - // Create key value pairs to insert - KVMap map_insert; - MakeKVMap(kSampleSize_, &map_insert); - for (auto& kv : map_insert) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - // A time series column family is created - ASSERT_EQ(2, GetColumnFamilyCount()); - - Sleep(2); - KVMap map_insert2; - MakeKVMap(kSampleSize_, &map_insert2); - for (auto& kv : map_insert2) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - // Another time series column family is created - ASSERT_EQ(3, GetColumnFamilyCount()); - - Sleep(4); - - // Data should not be found in database - for (auto& kv : map_insert) { - std::string value; - auto s = date_tiered_db_->Get(ropts, kv.first, &value); - ASSERT_TRUE(s.IsNotFound()); - } - - // Explicitly drop obsolete column families - date_tiered_db_->DropObsoleteColumnFamilies(); - - // The first column family is deleted from database - ASSERT_EQ(2, GetColumnFamilyCount()); - - CloseDateTieredDB(); -} - -// Puts a set of values and checks its presence using iterator during ttl -TEST_F(DateTieredTest, IteratorLifeCycle) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(2, 2); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - - // Create key value pairs to insert - KVMap map_insert; - MakeKVMap(kSampleSize_, &map_insert); - Iterator* dbiter; - - // Put data in database - for (auto& kv : map_insert) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - - Sleep(1); - ASSERT_EQ(2, GetColumnFamilyCount()); - // T=1, keys should still reside in database - dbiter = date_tiered_db_->NewIterator(ropts); - dbiter->SeekToFirst(); - for (auto& kv : map_insert) { - ASSERT_TRUE(dbiter->Valid()); - ASSERT_EQ(0, dbiter->value().compare(kv.second)); - dbiter->Next(); - } - delete dbiter; - - Sleep(4); - // T=5, keys should not be retrieved - for (auto& kv : map_insert) { - std::string value; - auto s = date_tiered_db_->Get(ropts, kv.first, &value); - ASSERT_TRUE(s.IsNotFound()); - } - - // Explicitly drop obsolete column families - date_tiered_db_->DropObsoleteColumnFamilies(); - - // Only default column family - ASSERT_EQ(1, GetColumnFamilyCount()); - - // Empty iterator - dbiter = date_tiered_db_->NewIterator(ropts); - dbiter->Seek(map_insert.begin()->first); - ASSERT_FALSE(dbiter->Valid()); - delete dbiter; - - CloseDateTieredDB(); -} - -// Iterator should be able to merge data from multiple column families -TEST_F(DateTieredTest, IteratorMerge) { - WriteOptions wopts; - ReadOptions ropts; - - // T=0, open the database and insert data - OpenDateTieredDB(4, 2); - ASSERT_TRUE(date_tiered_db_.get() != nullptr); - - Iterator* dbiter; - - // Put data in database - KVMap map_insert1; - MakeKVMap(kSampleSize_, &map_insert1); - for (auto& kv : map_insert1) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - ASSERT_EQ(2, GetColumnFamilyCount()); - - Sleep(2); - // Put more data - KVMap map_insert2; - MakeKVMap(kSampleSize_, &map_insert2); - for (auto& kv : map_insert2) { - ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second)); - } - // Multiple column families for time series data - ASSERT_EQ(3, GetColumnFamilyCount()); - - // Iterator should be able to merge data from different column families - dbiter = date_tiered_db_->NewIterator(ropts); - dbiter->SeekToFirst(); - KVMap::iterator iter1 = map_insert1.begin(); - KVMap::iterator iter2 = map_insert2.begin(); - for (; iter1 != map_insert1.end() && iter2 != map_insert2.end(); - iter1++, iter2++) { - ASSERT_TRUE(dbiter->Valid()); - ASSERT_EQ(0, dbiter->value().compare(iter1->second)); - dbiter->Next(); - - ASSERT_TRUE(dbiter->Valid()); - ASSERT_EQ(0, dbiter->value().compare(iter2->second)); - dbiter->Next(); - } - delete dbiter; - - CloseDateTieredDB(); -} - -} // namespace rocksdb - -// A black-box test for the DateTieredDB around rocksdb -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -#else -#include - -int main(int /*argc*/, char** /*argv*/) { - fprintf(stderr, "SKIPPED as DateTieredDB is not supported in ROCKSDB_LITE\n"); - return 0; -} - -#endif // !ROCKSDB_LITE diff --git a/utilities/document/document_db.cc b/utilities/document/document_db.cc deleted file mode 100644 index 279e4cb4d..000000000 --- a/utilities/document/document_db.cc +++ /dev/null @@ -1,1207 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef ROCKSDB_LITE - -#include "rocksdb/utilities/document_db.h" - -#include "rocksdb/cache.h" -#include "rocksdb/table.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/comparator.h" -#include "rocksdb/db.h" -#include "rocksdb/slice.h" -#include "rocksdb/utilities/json_document.h" -#include "util/coding.h" -#include "util/mutexlock.h" -#include "port/port.h" - -namespace rocksdb { - -// IMPORTANT NOTE: Secondary index column families should be very small and -// generally fit in memory. Assume that accessing secondary index column -// families is much faster than accessing primary index (data heap) column -// family. Accessing a key (i.e. checking for existence) from a column family in -// RocksDB is not much faster than accessing both key and value since they are -// kept together and loaded from storage together. - -namespace { -// < 0 <=> lhs < rhs -// == 0 <=> lhs == rhs -// > 0 <=> lhs == rhs -// TODO(icanadi) move this to JSONDocument? -int DocumentCompare(const JSONDocument& lhs, const JSONDocument& rhs) { - assert(lhs.IsObject() == false && rhs.IsObject() == false && - lhs.type() == rhs.type()); - - switch (lhs.type()) { - case JSONDocument::kNull: - return 0; - case JSONDocument::kBool: - return static_cast(lhs.GetBool()) - static_cast(rhs.GetBool()); - case JSONDocument::kDouble: { - double res = lhs.GetDouble() - rhs.GetDouble(); - return res == 0.0 ? 0 : (res < 0.0 ? -1 : 1); - } - case JSONDocument::kInt64: { - int64_t res = lhs.GetInt64() - rhs.GetInt64(); - return res == 0 ? 0 : (res < 0 ? -1 : 1); - } - case JSONDocument::kString: - return Slice(lhs.GetString()).compare(Slice(rhs.GetString())); - default: - assert(false); - } - return 0; -} -} // namespace - -class Filter { - public: - // returns nullptr on parse failure - static Filter* ParseFilter(const JSONDocument& filter); - - struct Interval { - JSONDocument upper_bound; - JSONDocument lower_bound; - bool upper_inclusive; - bool lower_inclusive; - Interval() - : upper_bound(), - lower_bound(), - upper_inclusive(false), - lower_inclusive(false) {} - Interval(const JSONDocument& ub, const JSONDocument& lb, bool ui, bool li) - : upper_bound(ub), - lower_bound(lb), - upper_inclusive(ui), - lower_inclusive(li) { - } - - void UpdateUpperBound(const JSONDocument& ub, bool inclusive); - void UpdateLowerBound(const JSONDocument& lb, bool inclusive); - }; - - bool SatisfiesFilter(const JSONDocument& document) const; - const Interval* GetInterval(const std::string& field) const; - - private: - explicit Filter(const JSONDocument& filter) : filter_(filter.Copy()) { - assert(filter_.IsOwner()); - } - - // copied from the parameter - const JSONDocument filter_; - // constant after construction - std::unordered_map intervals_; -}; - -void Filter::Interval::UpdateUpperBound(const JSONDocument& ub, - bool inclusive) { - bool update = upper_bound.IsNull(); - if (!update) { - int cmp = DocumentCompare(upper_bound, ub); - update = (cmp > 0) || (cmp == 0 && !inclusive); - } - if (update) { - upper_bound = ub; - upper_inclusive = inclusive; - } -} - -void Filter::Interval::UpdateLowerBound(const JSONDocument& lb, - bool inclusive) { - bool update = lower_bound.IsNull(); - if (!update) { - int cmp = DocumentCompare(lower_bound, lb); - update = (cmp < 0) || (cmp == 0 && !inclusive); - } - if (update) { - lower_bound = lb; - lower_inclusive = inclusive; - } -} - -Filter* Filter::ParseFilter(const JSONDocument& filter) { - if (filter.IsObject() == false) { - return nullptr; - } - - std::unique_ptr f(new Filter(filter)); - - for (const auto& items : f->filter_.Items()) { - if (items.first.size() && items.first[0] == '$') { - // fields starting with '$' are commands - continue; - } - assert(f->intervals_.find(items.first) == f->intervals_.end()); - if (items.second.IsObject()) { - if (items.second.Count() == 0) { - // uhm...? - return nullptr; - } - Interval interval; - for (const auto& condition : items.second.Items()) { - if (condition.second.IsObject() || condition.second.IsArray()) { - // comparison operators not defined on objects. invalid array - return nullptr; - } - // comparison operators: - if (condition.first == "$gt") { - interval.UpdateLowerBound(condition.second, false); - } else if (condition.first == "$gte") { - interval.UpdateLowerBound(condition.second, true); - } else if (condition.first == "$lt") { - interval.UpdateUpperBound(condition.second, false); - } else if (condition.first == "$lte") { - interval.UpdateUpperBound(condition.second, true); - } else { - // TODO(icanadi) more logical operators - return nullptr; - } - } - f->intervals_.insert({items.first, interval}); - } else { - // equality - f->intervals_.insert( - {items.first, Interval(items.second, - items.second, true, true)}); - } - } - - return f.release(); -} - -const Filter::Interval* Filter::GetInterval(const std::string& field) const { - auto itr = intervals_.find(field); - if (itr == intervals_.end()) { - return nullptr; - } - // we can do that since intervals_ is constant after construction - return &itr->second; -} - -bool Filter::SatisfiesFilter(const JSONDocument& document) const { - for (const auto& interval : intervals_) { - if (!document.Contains(interval.first)) { - // doesn't have the value, doesn't satisfy the filter - // (we don't support null queries yet) - return false; - } - auto value = document[interval.first]; - if (!interval.second.upper_bound.IsNull()) { - if (value.type() != interval.second.upper_bound.type()) { - // no cross-type queries yet - // TODO(icanadi) do this at least for numbers! - return false; - } - int cmp = DocumentCompare(interval.second.upper_bound, value); - if (cmp < 0 || (cmp == 0 && interval.second.upper_inclusive == false)) { - // bigger (or equal) than upper bound - return false; - } - } - if (!interval.second.lower_bound.IsNull()) { - if (value.type() != interval.second.lower_bound.type()) { - // no cross-type queries yet - return false; - } - int cmp = DocumentCompare(interval.second.lower_bound, value); - if (cmp > 0 || (cmp == 0 && interval.second.lower_inclusive == false)) { - // smaller (or equal) than the lower bound - return false; - } - } - } - return true; -} - -class Index { - public: - Index() = default; - virtual ~Index() {} - - virtual const char* Name() const = 0; - - // Functions that are executed during write time - // --------------------------------------------- - // GetIndexKey() generates a key that will be used to index document and - // returns the key though the second std::string* parameter - virtual void GetIndexKey(const JSONDocument& document, - std::string* key) const = 0; - // Keys generated with GetIndexKey() will be compared using this comparator. - // It should be assumed that there will be a suffix added to the index key - // according to IndexKey implementation - virtual const Comparator* GetComparator() const = 0; - - // Functions that are executed during query time - // --------------------------------------------- - enum Direction { - kForwards, - kBackwards, - }; - // Returns true if this index can provide some optimization for satisfying - // filter. False otherwise - virtual bool UsefulIndex(const Filter& filter) const = 0; - // For every filter (assuming UsefulIndex()) there is a continuous interval of - // keys in the index that satisfy the index conditions. That interval can be - // three things: - // * [A, B] - // * [A, infinity> - // * <-infinity, B] - // - // Query engine that uses this Index for optimization will access the interval - // by first calling Position() and then iterating in the Direction (returned - // by Position()) while ShouldContinueLooking() is true. - // * For [A, B] interval Position() will Seek() to A and return kForwards. - // ShouldContinueLooking() will be true until the iterator value gets beyond B - // -- then it will return false - // * For [A, infinity> Position() will Seek() to A and return kForwards. - // ShouldContinueLooking() will always return true - // * For <-infinity, B] Position() will Seek() to B and return kBackwards. - // ShouldContinueLooking() will always return true (given that iterator is - // advanced by calling Prev()) - virtual Direction Position(const Filter& filter, - Iterator* iterator) const = 0; - virtual bool ShouldContinueLooking(const Filter& filter, - const Slice& secondary_key, - Direction direction) const = 0; - - // Static function that is executed when Index is created - // --------------------------------------------- - // Create Index from user-supplied description. Return nullptr on parse - // failure. - static Index* CreateIndexFromDescription(const JSONDocument& description, - const std::string& name); - - private: - // No copying allowed - Index(const Index&); - void operator=(const Index&); -}; - -// Encoding helper function -namespace { -std::string InternalSecondaryIndexName(const std::string& user_name) { - return "index_" + user_name; -} - -// Don't change these, they are persisted in secondary indexes -enum JSONPrimitivesEncoding : char { - kNull = 0x1, - kBool = 0x2, - kDouble = 0x3, - kInt64 = 0x4, - kString = 0x5, -}; - -// encodes simple JSON members (meaning string, integer, etc) -// the end result of this will be lexicographically compared to each other -bool EncodeJSONPrimitive(const JSONDocument& json, std::string* dst) { - // TODO(icanadi) revise this at some point, have a custom comparator - switch (json.type()) { - case JSONDocument::kNull: - dst->push_back(kNull); - break; - case JSONDocument::kBool: - dst->push_back(kBool); - dst->push_back(static_cast(json.GetBool())); - break; - case JSONDocument::kDouble: - dst->push_back(kDouble); - PutFixed64(dst, static_cast(json.GetDouble())); - break; - case JSONDocument::kInt64: - dst->push_back(kInt64); - { - auto val = json.GetInt64(); - dst->push_back((val < 0) ? '0' : '1'); - PutFixed64(dst, static_cast(val)); - } - break; - case JSONDocument::kString: - dst->push_back(kString); - dst->append(json.GetString()); - break; - default: - return false; - } - return true; -} - -} // namespace - -// format of the secondary key is: -// -class IndexKey { - public: - IndexKey() : ok_(false) {} - explicit IndexKey(const Slice& slice) { - if (slice.size() < sizeof(uint32_t)) { - ok_ = false; - return; - } - uint32_t primary_key_offset = - DecodeFixed32(slice.data() + slice.size() - sizeof(uint32_t)); - if (primary_key_offset >= slice.size() - sizeof(uint32_t)) { - ok_ = false; - return; - } - parts_[0] = Slice(slice.data(), primary_key_offset); - parts_[1] = Slice(slice.data() + primary_key_offset, - slice.size() - primary_key_offset - sizeof(uint32_t)); - ok_ = true; - } - IndexKey(const Slice& secondary_key, const Slice& primary_key) : ok_(true) { - parts_[0] = secondary_key; - parts_[1] = primary_key; - } - - SliceParts GetSliceParts() { - uint32_t primary_key_offset = static_cast(parts_[0].size()); - EncodeFixed32(primary_key_offset_buf_, primary_key_offset); - parts_[2] = Slice(primary_key_offset_buf_, sizeof(uint32_t)); - return SliceParts(parts_, 3); - } - - const Slice& GetPrimaryKey() const { return parts_[1]; } - const Slice& GetSecondaryKey() const { return parts_[0]; } - - bool ok() const { return ok_; } - - private: - bool ok_; - // 0 -- secondary key - // 1 -- primary key - // 2 -- primary key offset - Slice parts_[3]; - char primary_key_offset_buf_[sizeof(uint32_t)]; -}; - -class SimpleSortedIndex : public Index { - public: - SimpleSortedIndex(const std::string& field, const std::string& name) - : field_(field), name_(name) {} - - virtual const char* Name() const override { return name_.c_str(); } - - virtual void GetIndexKey(const JSONDocument& document, std::string* key) const - override { - if (!document.Contains(field_)) { - if (!EncodeJSONPrimitive(JSONDocument(JSONDocument::kNull), key)) { - assert(false); - } - } else { - if (!EncodeJSONPrimitive(document[field_], key)) { - assert(false); - } - } - } - virtual const Comparator* GetComparator() const override { - return BytewiseComparator(); - } - - virtual bool UsefulIndex(const Filter& filter) const override { - return filter.GetInterval(field_) != nullptr; - } - // REQUIRES: UsefulIndex(filter) == true - virtual Direction Position(const Filter& filter, - Iterator* iterator) const override { - auto interval = filter.GetInterval(field_); - assert(interval != nullptr); // because index is useful - Direction direction; - - const JSONDocument* limit; - if (!interval->lower_bound.IsNull()) { - limit = &(interval->lower_bound); - direction = kForwards; - } else { - limit = &(interval->upper_bound); - direction = kBackwards; - } - - std::string encoded_limit; - if (!EncodeJSONPrimitive(*limit, &encoded_limit)) { - assert(false); - } - iterator->Seek(Slice(encoded_limit)); - - return direction; - } - // REQUIRES: UsefulIndex(filter) == true -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 4702) // Unreachable code -#endif - virtual bool ShouldContinueLooking( - const Filter& filter, const Slice& secondary_key, - Index::Direction direction) const override { - auto interval = filter.GetInterval(field_); - assert(interval != nullptr); // because index is useful - if (direction == kForwards) { - if (interval->upper_bound.IsNull()) { - // continue looking, no upper bound - return true; - } - std::string encoded_upper_bound; - if (!EncodeJSONPrimitive(interval->upper_bound, &encoded_upper_bound)) { - // uhm...? - // TODO(icanadi) store encoded upper and lower bounds in Filter*? - assert(false); - } - // TODO(icanadi) we need to somehow decode this and use DocumentCompare() - int compare = secondary_key.compare(Slice(encoded_upper_bound)); - // if (current key is bigger than upper bound) OR (current key is equal to - // upper bound, but inclusive is false) THEN stop looking. otherwise, - // continue - return (compare > 0 || - (compare == 0 && interval->upper_inclusive == false)) - ? false - : true; - } else { - assert(direction == kBackwards); - if (interval->lower_bound.IsNull()) { - // continue looking, no lower bound - return true; - } - std::string encoded_lower_bound; - if (!EncodeJSONPrimitive(interval->lower_bound, &encoded_lower_bound)) { - // uhm...? - // TODO(icanadi) store encoded upper and lower bounds in Filter*? - assert(false); - } - // TODO(icanadi) we need to somehow decode this and use DocumentCompare() - int compare = secondary_key.compare(Slice(encoded_lower_bound)); - // if (current key is smaller than lower bound) OR (current key is equal - // to lower bound, but inclusive is false) THEN stop looking. otherwise, - // continue - return (compare < 0 || - (compare == 0 && interval->lower_inclusive == false)) - ? false - : true; - } - - assert(false); - // this is here just so compiler doesn't complain - return false; - } -#if defined(_MSC_VER) -#pragma warning(pop) -#endif - private: - std::string field_; - std::string name_; -}; - -Index* Index::CreateIndexFromDescription(const JSONDocument& description, - const std::string& name) { - if (!description.IsObject() || description.Count() != 1) { - // not supported yet - return nullptr; - } - const auto& field = *description.Items().begin(); - if (field.second.IsInt64() == false || field.second.GetInt64() != 1) { - // not supported yet - return nullptr; - } - return new SimpleSortedIndex(field.first, name); -} - -class CursorWithFilterIndexed : public Cursor { - public: - CursorWithFilterIndexed(Iterator* primary_index_iter, - Iterator* secondary_index_iter, const Index* index, - const Filter* filter) - : primary_index_iter_(primary_index_iter), - secondary_index_iter_(secondary_index_iter), - index_(index), - filter_(filter), - valid_(true), - current_json_document_(nullptr) { - assert(filter_.get() != nullptr); - direction_ = index->Position(*filter_.get(), secondary_index_iter_.get()); - UpdateIndexKey(); - AdvanceUntilSatisfies(); - } - - virtual bool Valid() const override { - return valid_ && secondary_index_iter_->Valid(); - } - virtual void Next() override { - assert(Valid()); - Advance(); - AdvanceUntilSatisfies(); - } - // temporary object. copy it if you want to use it - virtual const JSONDocument& document() const override { - assert(Valid()); - return *current_json_document_; - } - virtual Status status() const override { - if (!status_.ok()) { - return status_; - } - if (!primary_index_iter_->status().ok()) { - return primary_index_iter_->status(); - } - return secondary_index_iter_->status(); - } - - private: - void Advance() { - if (direction_ == Index::kForwards) { - secondary_index_iter_->Next(); - } else { - secondary_index_iter_->Prev(); - } - UpdateIndexKey(); - } - void AdvanceUntilSatisfies() { - bool found = false; - while (secondary_index_iter_->Valid() && - index_->ShouldContinueLooking( - *filter_.get(), index_key_.GetSecondaryKey(), direction_)) { - if (!UpdateJSONDocument()) { - // corruption happened - return; - } - if (filter_->SatisfiesFilter(*current_json_document_)) { - // we found satisfied! - found = true; - break; - } else { - // doesn't satisfy :( - Advance(); - } - } - if (!found) { - valid_ = false; - } - } - - bool UpdateJSONDocument() { - assert(secondary_index_iter_->Valid()); - primary_index_iter_->Seek(index_key_.GetPrimaryKey()); - if (!primary_index_iter_->Valid()) { - status_ = Status::Corruption( - "Inconsistency between primary and secondary index"); - valid_ = false; - return false; - } - current_json_document_.reset( - JSONDocument::Deserialize(primary_index_iter_->value())); - assert(current_json_document_->IsOwner()); - if (current_json_document_.get() == nullptr) { - status_ = Status::Corruption("JSON deserialization failed"); - valid_ = false; - return false; - } - return true; - } - void UpdateIndexKey() { - if (secondary_index_iter_->Valid()) { - index_key_ = IndexKey(secondary_index_iter_->key()); - if (!index_key_.ok()) { - status_ = Status::Corruption("Invalid index key"); - valid_ = false; - } - } - } - std::unique_ptr primary_index_iter_; - std::unique_ptr secondary_index_iter_; - // we don't own index_ - const Index* index_; - Index::Direction direction_; - std::unique_ptr filter_; - bool valid_; - IndexKey index_key_; - std::unique_ptr current_json_document_; - Status status_; -}; - -class CursorFromIterator : public Cursor { - public: - explicit CursorFromIterator(Iterator* iter) - : iter_(iter), current_json_document_(nullptr) { - iter_->SeekToFirst(); - UpdateCurrentJSON(); - } - - virtual bool Valid() const override { return status_.ok() && iter_->Valid(); } - virtual void Next() override { - iter_->Next(); - UpdateCurrentJSON(); - } - virtual const JSONDocument& document() const override { - assert(Valid()); - return *current_json_document_; - }; - virtual Status status() const override { - if (!status_.ok()) { - return status_; - } - return iter_->status(); - } - - // not part of public Cursor interface - Slice key() const { return iter_->key(); } - - private: - void UpdateCurrentJSON() { - if (Valid()) { - current_json_document_.reset(JSONDocument::Deserialize(iter_->value())); - if (current_json_document_.get() == nullptr) { - status_ = Status::Corruption("JSON deserialization failed"); - } - } - } - - Status status_; - std::unique_ptr iter_; - std::unique_ptr current_json_document_; -}; - -class CursorWithFilter : public Cursor { - public: - CursorWithFilter(Cursor* base_cursor, const Filter* filter) - : base_cursor_(base_cursor), filter_(filter) { - assert(filter_.get() != nullptr); - SeekToNextSatisfies(); - } - virtual bool Valid() const override { return base_cursor_->Valid(); } - virtual void Next() override { - assert(Valid()); - base_cursor_->Next(); - SeekToNextSatisfies(); - } - virtual const JSONDocument& document() const override { - assert(Valid()); - return base_cursor_->document(); - } - virtual Status status() const override { return base_cursor_->status(); } - - private: - void SeekToNextSatisfies() { - for (; base_cursor_->Valid(); base_cursor_->Next()) { - if (filter_->SatisfiesFilter(base_cursor_->document())) { - break; - } - } - } - std::unique_ptr base_cursor_; - std::unique_ptr filter_; -}; - -class CursorError : public Cursor { - public: - explicit CursorError(Status s) : s_(s) { assert(!s.ok()); } - virtual Status status() const override { return s_; } - virtual bool Valid() const override { return false; } - virtual void Next() override {} - virtual const JSONDocument& document() const override { - assert(false); - // compiler complains otherwise - return trash_; - } - - private: - Status s_; - JSONDocument trash_; -}; - -class DocumentDBImpl : public DocumentDB { - public: - DocumentDBImpl( - DB* db, ColumnFamilyHandle* primary_key_column_family, - const std::vector>& indexes, - const Options& rocksdb_options) - : DocumentDB(db), - primary_key_column_family_(primary_key_column_family), - rocksdb_options_(rocksdb_options) { - for (const auto& index : indexes) { - name_to_index_.insert( - {index.first->Name(), IndexColumnFamily(index.first, index.second)}); - } - } - - ~DocumentDBImpl() { - for (auto& iter : name_to_index_) { - delete iter.second.index; - delete iter.second.column_family; - } - delete primary_key_column_family_; - } - - virtual Status CreateIndex(const WriteOptions& write_options, - const IndexDescriptor& index) override { - auto index_obj = - Index::CreateIndexFromDescription(*index.description, index.name); - if (index_obj == nullptr) { - return Status::InvalidArgument("Failed parsing index description"); - } - - ColumnFamilyHandle* cf_handle; - Status s = - CreateColumnFamily(ColumnFamilyOptions(rocksdb_options_), - InternalSecondaryIndexName(index.name), &cf_handle); - if (!s.ok()) { - delete index_obj; - return s; - } - - MutexLock l(&write_mutex_); - - std::unique_ptr cursor(new CursorFromIterator( - DocumentDB::NewIterator(ReadOptions(), primary_key_column_family_))); - - WriteBatch batch; - for (; cursor->Valid(); cursor->Next()) { - std::string secondary_index_key; - index_obj->GetIndexKey(cursor->document(), &secondary_index_key); - IndexKey index_key(Slice(secondary_index_key), cursor->key()); - batch.Put(cf_handle, index_key.GetSliceParts(), SliceParts()); - } - - if (!cursor->status().ok()) { - delete index_obj; - return cursor->status(); - } - - { - MutexLock l_nti(&name_to_index_mutex_); - name_to_index_.insert( - {index.name, IndexColumnFamily(index_obj, cf_handle)}); - } - - return DocumentDB::Write(write_options, &batch); - } - - virtual Status DropIndex(const std::string& name) override { - MutexLock l(&write_mutex_); - - auto index_iter = name_to_index_.find(name); - if (index_iter == name_to_index_.end()) { - return Status::InvalidArgument("No such index"); - } - - Status s = DropColumnFamily(index_iter->second.column_family); - if (!s.ok()) { - return s; - } - - delete index_iter->second.index; - delete index_iter->second.column_family; - - // remove from name_to_index_ - { - MutexLock l_nti(&name_to_index_mutex_); - name_to_index_.erase(index_iter); - } - - return Status::OK(); - } - - virtual Status Insert(const WriteOptions& options, - const JSONDocument& document) override { - WriteBatch batch; - - if (!document.IsObject()) { - return Status::InvalidArgument("Document not an object"); - } - if (!document.Contains(kPrimaryKey)) { - return Status::InvalidArgument("No primary key"); - } - auto primary_key = document[kPrimaryKey]; - if (primary_key.IsNull() || - (!primary_key.IsString() && !primary_key.IsInt64())) { - return Status::InvalidArgument( - "Primary key format error"); - } - std::string encoded_document; - document.Serialize(&encoded_document); - std::string primary_key_encoded; - if (!EncodeJSONPrimitive(primary_key, &primary_key_encoded)) { - // previous call should be guaranteed to pass because of all primary_key - // conditions checked before - assert(false); - } - Slice primary_key_slice(primary_key_encoded); - - // Lock now, since we're starting DB operations - MutexLock l(&write_mutex_); - // check if there is already a document with the same primary key - PinnableSlice value; - Status s = DocumentDB::Get(ReadOptions(), primary_key_column_family_, - primary_key_slice, &value); - if (!s.IsNotFound()) { - return s.ok() ? Status::InvalidArgument("Duplicate primary key!") : s; - } - - batch.Put(primary_key_column_family_, primary_key_slice, encoded_document); - - for (const auto& iter : name_to_index_) { - std::string secondary_index_key; - iter.second.index->GetIndexKey(document, &secondary_index_key); - IndexKey index_key(Slice(secondary_index_key), primary_key_slice); - batch.Put(iter.second.column_family, index_key.GetSliceParts(), - SliceParts()); - } - - return DocumentDB::Write(options, &batch); - } - - virtual Status Remove(const ReadOptions& read_options, - const WriteOptions& write_options, - const JSONDocument& query) override { - MutexLock l(&write_mutex_); - std::unique_ptr cursor( - ConstructFilterCursor(read_options, nullptr, query)); - - WriteBatch batch; - for (; cursor->status().ok() && cursor->Valid(); cursor->Next()) { - const auto& document = cursor->document(); - if (!document.IsObject()) { - return Status::Corruption("Document corruption"); - } - if (!document.Contains(kPrimaryKey)) { - return Status::Corruption("Document corruption"); - } - auto primary_key = document[kPrimaryKey]; - if (primary_key.IsNull() || - (!primary_key.IsString() && !primary_key.IsInt64())) { - return Status::Corruption("Document corruption"); - } - - // TODO(icanadi) Instead of doing this, just get primary key encoding from - // cursor, as it already has this information - std::string primary_key_encoded; - if (!EncodeJSONPrimitive(primary_key, &primary_key_encoded)) { - // previous call should be guaranteed to pass because of all primary_key - // conditions checked before - assert(false); - } - Slice primary_key_slice(primary_key_encoded); - batch.Delete(primary_key_column_family_, primary_key_slice); - - for (const auto& iter : name_to_index_) { - std::string secondary_index_key; - iter.second.index->GetIndexKey(document, &secondary_index_key); - IndexKey index_key(Slice(secondary_index_key), primary_key_slice); - batch.Delete(iter.second.column_family, index_key.GetSliceParts()); - } - } - - if (!cursor->status().ok()) { - return cursor->status(); - } - - return DocumentDB::Write(write_options, &batch); - } - - virtual Status Update(const ReadOptions& read_options, - const WriteOptions& write_options, - const JSONDocument& filter, - const JSONDocument& updates) override { - MutexLock l(&write_mutex_); - std::unique_ptr cursor( - ConstructFilterCursor(read_options, nullptr, filter)); - - if (!updates.IsObject()) { - return Status::Corruption("Bad update document format"); - } - WriteBatch batch; - for (; cursor->status().ok() && cursor->Valid(); cursor->Next()) { - const auto& old_document = cursor->document(); - JSONDocument new_document(old_document); - if (!new_document.IsObject()) { - return Status::Corruption("Document corruption"); - } - // TODO(icanadi) Make this nicer, something like class Filter - for (const auto& update : updates.Items()) { - if (update.first == "$set") { - JSONDocumentBuilder builder; - bool res __attribute__((__unused__)) = builder.WriteStartObject(); - assert(res); - for (const auto& itr : update.second.Items()) { - if (itr.first == kPrimaryKey) { - return Status::NotSupported("Please don't change primary key"); - } - res = builder.WriteKeyValue(itr.first, itr.second); - assert(res); - } - res = builder.WriteEndObject(); - assert(res); - JSONDocument update_document = builder.GetJSONDocument(); - builder.Reset(); - res = builder.WriteStartObject(); - assert(res); - for (const auto& itr : new_document.Items()) { - if (update_document.Contains(itr.first)) { - res = builder.WriteKeyValue(itr.first, - update_document[itr.first]); - } else { - res = builder.WriteKeyValue(itr.first, new_document[itr.first]); - } - assert(res); - } - res = builder.WriteEndObject(); - assert(res); - new_document = builder.GetJSONDocument(); - assert(new_document.IsOwner()); - } else { - // TODO(icanadi) more commands - return Status::InvalidArgument("Can't understand update command"); - } - } - - // TODO(icanadi) reuse some of this code - if (!new_document.Contains(kPrimaryKey)) { - return Status::Corruption("Corrupted document -- primary key missing"); - } - auto primary_key = new_document[kPrimaryKey]; - if (primary_key.IsNull() || - (!primary_key.IsString() && !primary_key.IsInt64())) { - // This will happen when document on storage doesn't have primary key, - // since we don't support any update operations on primary key. That's - // why this is corruption error - return Status::Corruption("Corrupted document -- primary key missing"); - } - std::string encoded_document; - new_document.Serialize(&encoded_document); - std::string primary_key_encoded; - if (!EncodeJSONPrimitive(primary_key, &primary_key_encoded)) { - // previous call should be guaranteed to pass because of all primary_key - // conditions checked before - assert(false); - } - Slice primary_key_slice(primary_key_encoded); - batch.Put(primary_key_column_family_, primary_key_slice, - encoded_document); - - for (const auto& iter : name_to_index_) { - std::string old_key, new_key; - iter.second.index->GetIndexKey(old_document, &old_key); - iter.second.index->GetIndexKey(new_document, &new_key); - if (old_key == new_key) { - // don't need to update this secondary index - continue; - } - - IndexKey old_index_key(Slice(old_key), primary_key_slice); - IndexKey new_index_key(Slice(new_key), primary_key_slice); - - batch.Delete(iter.second.column_family, old_index_key.GetSliceParts()); - batch.Put(iter.second.column_family, new_index_key.GetSliceParts(), - SliceParts()); - } - } - - if (!cursor->status().ok()) { - return cursor->status(); - } - - return DocumentDB::Write(write_options, &batch); - } - - virtual Cursor* Query(const ReadOptions& read_options, - const JSONDocument& query) override { - Cursor* cursor = nullptr; - - if (!query.IsArray()) { - return new CursorError( - Status::InvalidArgument("Query has to be an array")); - } - - // TODO(icanadi) support index "_id" - for (size_t i = 0; i < query.Count(); ++i) { - const auto& command_doc = query[i]; - if (command_doc.Count() != 1) { - // there can be only one key-value pair in each of array elements. - // key is the command and value are the params - delete cursor; - return new CursorError(Status::InvalidArgument("Invalid query")); - } - const auto& command = *command_doc.Items().begin(); - - if (command.first == "$filter") { - cursor = ConstructFilterCursor(read_options, cursor, command.second); - } else { - // only filter is supported for now - delete cursor; - return new CursorError(Status::InvalidArgument("Invalid query")); - } - } - - if (cursor == nullptr) { - cursor = new CursorFromIterator( - DocumentDB::NewIterator(read_options, primary_key_column_family_)); - } - - return cursor; - } - - // RocksDB functions - using DB::Get; - virtual Status Get(const ReadOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, - const Slice& /*key*/, PinnableSlice* /*value*/) override { - return Status::NotSupported(""); - } - virtual Status Get(const ReadOptions& /*options*/, const Slice& /*key*/, - std::string* /*value*/) override { - return Status::NotSupported(""); - } - virtual Status Write(const WriteOptions& /*options*/, - WriteBatch* /*updates*/) override { - return Status::NotSupported(""); - } - virtual Iterator* NewIterator( - const ReadOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/) override { - return nullptr; - } - virtual Iterator* NewIterator(const ReadOptions& /*options*/) override { - return nullptr; - } - - private: -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 4702) // unreachable code -#endif - Cursor* ConstructFilterCursor(ReadOptions read_options, Cursor* cursor, - const JSONDocument& query) { - std::unique_ptr filter(Filter::ParseFilter(query)); - if (filter.get() == nullptr) { - return new CursorError(Status::InvalidArgument("Invalid query")); - } - - IndexColumnFamily tmp_storage(nullptr, nullptr); - - if (cursor == nullptr) { - IndexColumnFamily* index_column_family = nullptr; - if (query.Contains("$index") && query["$index"].IsString()) { - { - auto index_name = query["$index"]; - MutexLock l(&name_to_index_mutex_); - auto index_iter = name_to_index_.find(index_name.GetString()); - if (index_iter != name_to_index_.end()) { - tmp_storage = index_iter->second; - index_column_family = &tmp_storage; - } else { - return new CursorError( - Status::InvalidArgument("Index does not exist")); - } - } - } - - if (index_column_family != nullptr && - index_column_family->index->UsefulIndex(*filter.get())) { - std::vector iterators; - Status s = DocumentDB::NewIterators( - read_options, - {primary_key_column_family_, index_column_family->column_family}, - &iterators); - if (!s.ok()) { - delete cursor; - return new CursorError(s); - } - assert(iterators.size() == 2); - return new CursorWithFilterIndexed(iterators[0], iterators[1], - index_column_family->index, - filter.release()); - } else { - return new CursorWithFilter( - new CursorFromIterator(DocumentDB::NewIterator( - read_options, primary_key_column_family_)), - filter.release()); - } - } else { - return new CursorWithFilter(cursor, filter.release()); - } - assert(false); - return nullptr; - } -#if defined(_MSC_VER) -#pragma warning(pop) -#endif - - // currently, we lock and serialize all writes to rocksdb. reads are not - // locked and always get consistent view of the database. we should optimize - // locking in the future - port::Mutex write_mutex_; - port::Mutex name_to_index_mutex_; - const char* kPrimaryKey = "_id"; - struct IndexColumnFamily { - IndexColumnFamily(Index* _index, ColumnFamilyHandle* _column_family) - : index(_index), column_family(_column_family) {} - Index* index; - ColumnFamilyHandle* column_family; - }; - - - // name_to_index_ protected: - // 1) when writing -- 1. lock write_mutex_, 2. lock name_to_index_mutex_ - // 2) when reading -- lock name_to_index_mutex_ OR write_mutex_ - std::unordered_map name_to_index_; - ColumnFamilyHandle* primary_key_column_family_; - Options rocksdb_options_; -}; - -namespace { -Options GetRocksDBOptionsFromOptions(const DocumentDBOptions& options) { - Options rocksdb_options; - rocksdb_options.max_background_compactions = options.background_threads - 1; - rocksdb_options.max_background_flushes = 1; - rocksdb_options.write_buffer_size = static_cast(options.memtable_size); - rocksdb_options.max_write_buffer_number = 6; - BlockBasedTableOptions table_options; - table_options.block_cache = NewLRUCache(static_cast(options.cache_size)); - rocksdb_options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - return rocksdb_options; -} -} // namespace - -Status DocumentDB::Open(const DocumentDBOptions& options, - const std::string& name, - const std::vector& indexes, - DocumentDB** db, bool read_only) { - Options rocksdb_options = GetRocksDBOptionsFromOptions(options); - rocksdb_options.create_if_missing = true; - - std::vector column_families; - column_families.push_back(ColumnFamilyDescriptor( - kDefaultColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); - for (const auto& index : indexes) { - column_families.emplace_back(InternalSecondaryIndexName(index.name), - ColumnFamilyOptions(rocksdb_options)); - } - std::vector handles; - DB* base_db; - Status s; - if (read_only) { - s = DB::OpenForReadOnly(DBOptions(rocksdb_options), name, column_families, - &handles, &base_db); - } else { - s = DB::Open(DBOptions(rocksdb_options), name, column_families, &handles, - &base_db); - } - if (!s.ok()) { - return s; - } - - std::vector> index_cf(indexes.size()); - assert(handles.size() == indexes.size() + 1); - for (size_t i = 0; i < indexes.size(); ++i) { - auto index = Index::CreateIndexFromDescription(*indexes[i].description, - indexes[i].name); - index_cf[i] = {index, handles[i + 1]}; - } - *db = new DocumentDBImpl(base_db, handles[0], index_cf, rocksdb_options); - return Status::OK(); -} - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/document/document_db_test.cc b/utilities/document/document_db_test.cc deleted file mode 100644 index 3ee560db1..000000000 --- a/utilities/document/document_db_test.cc +++ /dev/null @@ -1,338 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef ROCKSDB_LITE - -#include - -#include "rocksdb/utilities/json_document.h" -#include "rocksdb/utilities/document_db.h" - -#include "util/testharness.h" -#include "util/testutil.h" - -namespace rocksdb { - -class DocumentDBTest : public testing::Test { - public: - DocumentDBTest() { - dbname_ = test::PerThreadDBPath("document_db_test"); - DestroyDB(dbname_, Options()); - } - ~DocumentDBTest() { - delete db_; - DestroyDB(dbname_, Options()); - } - - void AssertCursorIDs(Cursor* cursor, std::vector expected) { - std::vector got; - while (cursor->Valid()) { - ASSERT_TRUE(cursor->Valid()); - ASSERT_TRUE(cursor->document().Contains("_id")); - got.push_back(cursor->document()["_id"].GetInt64()); - cursor->Next(); - } - std::sort(expected.begin(), expected.end()); - std::sort(got.begin(), got.end()); - ASSERT_TRUE(got == expected); - } - - // converts ' to ", so that we don't have to escape " all over the place - std::string ConvertQuotes(const std::string& input) { - std::string output; - for (auto x : input) { - if (x == '\'') { - output.push_back('\"'); - } else { - output.push_back(x); - } - } - return output; - } - - void CreateIndexes(std::vector indexes) { - for (auto i : indexes) { - ASSERT_OK(db_->CreateIndex(WriteOptions(), i)); - } - } - - JSONDocument* Parse(const std::string& doc) { - return JSONDocument::ParseJSON(ConvertQuotes(doc).c_str()); - } - - std::string dbname_; - DocumentDB* db_; -}; - -TEST_F(DocumentDBTest, SimpleQueryTest) { - DocumentDBOptions options; - DocumentDB::IndexDescriptor index; - index.description = Parse("{\"name\": 1}"); - index.name = "name_index"; - - ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_)); - CreateIndexes({index}); - delete db_; - db_ = nullptr; - // now there is index present - ASSERT_OK(DocumentDB::Open(options, dbname_, {index}, &db_)); - assert(db_ != nullptr); - delete index.description; - - std::vector json_objects = { - "{\"_id\': 1, \"name\": \"One\"}", "{\"_id\": 2, \"name\": \"Two\"}", - "{\"_id\": 3, \"name\": \"Three\"}", "{\"_id\": 4, \"name\": \"Four\"}"}; - - for (auto& json : json_objects) { - std::unique_ptr document(Parse(json)); - ASSERT_TRUE(document.get() != nullptr); - ASSERT_OK(db_->Insert(WriteOptions(), *document)); - } - - // inserting a document with existing primary key should return failure - { - std::unique_ptr document(Parse(json_objects[0])); - ASSERT_TRUE(document.get() != nullptr); - Status s = db_->Insert(WriteOptions(), *document); - ASSERT_TRUE(s.IsInvalidArgument()); - } - - // find equal to "Two" - { - std::unique_ptr query( - Parse("[{'$filter': {'name': 'Two', '$index': 'name_index'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {2}); - } - - // find less than "Three" - { - std::unique_ptr query(Parse( - "[{'$filter': {'name': {'$lt': 'Three'}, '$index': " - "'name_index'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - - AssertCursorIDs(cursor.get(), {1, 4}); - } - - // find less than "Three" without index - { - std::unique_ptr query( - Parse("[{'$filter': {'name': {'$lt': 'Three'} }}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {1, 4}); - } - - // remove less or equal to "Three" - { - std::unique_ptr query( - Parse("{'name': {'$lte': 'Three'}, '$index': 'name_index'}")); - ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query)); - } - - // find all -- only "Two" left, everything else should be deleted - { - std::unique_ptr query(Parse("[]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {2}); - } -} - -TEST_F(DocumentDBTest, ComplexQueryTest) { - DocumentDBOptions options; - DocumentDB::IndexDescriptor priority_index; - priority_index.description = Parse("{'priority': 1}"); - priority_index.name = "priority"; - DocumentDB::IndexDescriptor job_name_index; - job_name_index.description = Parse("{'job_name': 1}"); - job_name_index.name = "job_name"; - DocumentDB::IndexDescriptor progress_index; - progress_index.description = Parse("{'progress': 1}"); - progress_index.name = "progress"; - - ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_)); - CreateIndexes({priority_index, progress_index}); - delete priority_index.description; - delete progress_index.description; - - std::vector json_objects = { - "{'_id': 1, 'job_name': 'play', 'priority': 10, 'progress': 14.2}", - "{'_id': 2, 'job_name': 'white', 'priority': 2, 'progress': 45.1}", - "{'_id': 3, 'job_name': 'straw', 'priority': 5, 'progress': 83.2}", - "{'_id': 4, 'job_name': 'temporary', 'priority': 3, 'progress': 14.9}", - "{'_id': 5, 'job_name': 'white', 'priority': 4, 'progress': 44.2}", - "{'_id': 6, 'job_name': 'tea', 'priority': 1, 'progress': 12.4}", - "{'_id': 7, 'job_name': 'delete', 'priority': 2, 'progress': 77.54}", - "{'_id': 8, 'job_name': 'rock', 'priority': 3, 'progress': 93.24}", - "{'_id': 9, 'job_name': 'steady', 'priority': 3, 'progress': 9.1}", - "{'_id': 10, 'job_name': 'white', 'priority': 1, 'progress': 61.4}", - "{'_id': 11, 'job_name': 'who', 'priority': 4, 'progress': 39.41}", - "{'_id': 12, 'job_name': 'who', 'priority': -1, 'progress': 39.42}", - "{'_id': 13, 'job_name': 'who', 'priority': -2, 'progress': 39.42}", }; - - // add index on the fly! - CreateIndexes({job_name_index}); - delete job_name_index.description; - - for (auto& json : json_objects) { - std::unique_ptr document(Parse(json)); - ASSERT_TRUE(document != nullptr); - ASSERT_OK(db_->Insert(WriteOptions(), *document)); - } - - // 2 < priority < 4 AND progress > 10.0, index priority - { - std::unique_ptr query(Parse( - "[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': " - "10.0}, '$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {4, 8}); - } - - // -1 <= priority <= 1, index priority - { - std::unique_ptr query(Parse( - "[{'$filter': {'priority': {'$lte': 1, '$gte': -1}," - " '$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {6, 10, 12}); - } - - // 2 < priority < 4 AND progress > 10.0, index progress - { - std::unique_ptr query(Parse( - "[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': " - "10.0}, '$index': 'progress'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {4, 8}); - } - - // job_name == 'white' AND priority >= 2, index job_name - { - std::unique_ptr query(Parse( - "[{'$filter': {'job_name': 'white', 'priority': {'$gte': " - "2}, '$index': 'job_name'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {2, 5}); - } - - // 35.0 <= progress < 65.5, index progress - { - std::unique_ptr query(Parse( - "[{'$filter': {'progress': {'$gt': 5.0, '$gte': 35.0, '$lt': 65.5}, " - "'$index': 'progress'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {2, 5, 10, 11, 12, 13}); - } - - // 2 < priority <= 4, index priority - { - std::unique_ptr query(Parse( - "[{'$filter': {'priority': {'$gt': 2, '$lt': 8, '$lte': 4}, " - "'$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {4, 5, 8, 9, 11}); - } - - // Delete all whose progress is bigger than 50% - { - std::unique_ptr query( - Parse("{'progress': {'$gt': 50.0}, '$index': 'progress'}")); - ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query)); - } - - // 2 < priority < 6, index priority - { - std::unique_ptr query(Parse( - "[{'$filter': {'priority': {'$gt': 2, '$lt': 6}, " - "'$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - AssertCursorIDs(cursor.get(), {4, 5, 9, 11}); - } - - // update set priority to 10 where job_name is 'white' - { - std::unique_ptr query(Parse("{'job_name': 'white'}")); - std::unique_ptr update(Parse("{'$set': {'priority': 10}}")); - ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update)); - } - - // update twice: set priority to 15 where job_name is 'white' - { - std::unique_ptr query(Parse("{'job_name': 'white'}")); - std::unique_ptr update(Parse("{'$set': {'priority': 10}," - "'$set': {'priority': 15}}")); - ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update)); - } - - // update twice: set priority to 15 and - // progress to 40 where job_name is 'white' - { - std::unique_ptr query(Parse("{'job_name': 'white'}")); - std::unique_ptr update( - Parse("{'$set': {'priority': 10, 'progress': 35}," - "'$set': {'priority': 15, 'progress': 40}}")); - ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update)); - } - - // priority < 0 - { - std::unique_ptr query( - Parse("[{'$filter': {'priority': {'$lt': 0}, '$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - ASSERT_OK(cursor->status()); - AssertCursorIDs(cursor.get(), {12, 13}); - } - - // -2 < priority < 0 - { - std::unique_ptr query( - Parse("[{'$filter': {'priority': {'$gt': -2, '$lt': 0}," - " '$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - ASSERT_OK(cursor->status()); - AssertCursorIDs(cursor.get(), {12}); - } - - // -2 <= priority < 0 - { - std::unique_ptr query( - Parse("[{'$filter': {'priority': {'$gte': -2, '$lt': 0}," - " '$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - ASSERT_OK(cursor->status()); - AssertCursorIDs(cursor.get(), {12, 13}); - } - - // 4 < priority - { - std::unique_ptr query( - Parse("[{'$filter': {'priority': {'$gt': 4}, '$index': 'priority'}}]")); - std::unique_ptr cursor(db_->Query(ReadOptions(), *query)); - ASSERT_OK(cursor->status()); - AssertCursorIDs(cursor.get(), {1, 2, 5}); - } - - Status s = db_->DropIndex("doesnt-exist"); - ASSERT_TRUE(!s.ok()); - ASSERT_OK(db_->DropIndex("priority")); -} - -} // namespace rocksdb - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -#else -#include - -int main(int /*argc*/, char** /*argv*/) { - fprintf(stderr, "SKIPPED as DocumentDB is not supported in ROCKSDB_LITE\n"); - return 0; -} - -#endif // !ROCKSDB_LITE diff --git a/utilities/document/json_document.cc b/utilities/document/json_document.cc deleted file mode 100644 index 21a4c7dbc..000000000 --- a/utilities/document/json_document.cc +++ /dev/null @@ -1,610 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#ifndef ROCKSDB_LITE - -#include "rocksdb/utilities/json_document.h" - -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - - -#include "third-party/fbson/FbsonDocument.h" -#include "third-party/fbson/FbsonJsonParser.h" -#include "third-party/fbson/FbsonUtil.h" -#include "util/coding.h" - -using std::placeholders::_1; - -namespace { - -size_t ObjectNumElem(const fbson::ObjectVal& objectVal) { - size_t size = 0; - for (auto keyValuePair : objectVal) { - (void)keyValuePair; - ++size; - } - return size; -} - -template -void InitJSONDocument(std::unique_ptr* data, - fbson::FbsonValue** value, - Func f) { - // TODO(stash): maybe add function to FbsonDocument to avoid creating array? - fbson::FbsonWriter writer; - bool res __attribute__((__unused__)) = writer.writeStartArray(); - assert(res); - uint32_t bytesWritten __attribute__((__unused__)); - bytesWritten = f(writer); - assert(bytesWritten != 0); - res = writer.writeEndArray(); - assert(res); - char* buf = new char[writer.getOutput()->getSize()]; - memcpy(buf, writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); - - *value = ((fbson::FbsonDocument *)buf)->getValue(); - assert((*value)->isArray()); - assert(((fbson::ArrayVal*)*value)->numElem() == 1); - *value = ((fbson::ArrayVal*)*value)->get(0); - data->reset(buf); -} - -void InitString(std::unique_ptr* data, - fbson::FbsonValue** value, - const std::string& s) { - InitJSONDocument(data, value, std::bind( - [](fbson::FbsonWriter& writer, const std::string& str) -> uint32_t { - bool res __attribute__((__unused__)) = writer.writeStartString(); - assert(res); - auto bytesWritten = writer.writeString(str.c_str(), - static_cast(str.length())); - res = writer.writeEndString(); - assert(res); - // If the string is empty, then bytesWritten == 0, and assert in - // InitJsonDocument will fail. - return bytesWritten + static_cast(str.empty()); - }, - _1, s)); -} - -bool IsNumeric(fbson::FbsonValue* value) { - return value->isInt8() || value->isInt16() || - value->isInt32() || value->isInt64(); -} - -int64_t GetInt64ValFromFbsonNumericType(fbson::FbsonValue* value) { - switch (value->type()) { - case fbson::FbsonType::T_Int8: - return reinterpret_cast(value)->val(); - case fbson::FbsonType::T_Int16: - return reinterpret_cast(value)->val(); - case fbson::FbsonType::T_Int32: - return reinterpret_cast(value)->val(); - case fbson::FbsonType::T_Int64: - return reinterpret_cast(value)->val(); - default: - assert(false); - } - return 0; -} - -bool IsComparable(fbson::FbsonValue* left, fbson::FbsonValue* right) { - if (left->type() == right->type()) { - return true; - } - if (IsNumeric(left) && IsNumeric(right)) { - return true; - } - return false; -} - -void CreateArray(std::unique_ptr* data, fbson::FbsonValue** value) { - fbson::FbsonWriter writer; - bool res __attribute__((__unused__)) = writer.writeStartArray(); - assert(res); - res = writer.writeEndArray(); - assert(res); - data->reset(new char[writer.getOutput()->getSize()]); - memcpy(data->get(), - writer.getOutput()->getBuffer(), - writer.getOutput()->getSize()); - *value = reinterpret_cast(data->get())->getValue(); -} - -void CreateObject(std::unique_ptr* data, fbson::FbsonValue** value) { - fbson::FbsonWriter writer; - bool res __attribute__((__unused__)) = writer.writeStartObject(); - assert(res); - res = writer.writeEndObject(); - assert(res); - data->reset(new char[writer.getOutput()->getSize()]); - memcpy(data->get(), - writer.getOutput()->getBuffer(), - writer.getOutput()->getSize()); - *value = reinterpret_cast(data->get())->getValue(); -} - -} // namespace - -namespace rocksdb { - - -// TODO(stash): find smth easier -JSONDocument::JSONDocument() { - InitJSONDocument(&data_, - &value_, - std::bind(&fbson::FbsonWriter::writeNull, _1)); -} - -JSONDocument::JSONDocument(bool b) { - InitJSONDocument(&data_, - &value_, - std::bind(&fbson::FbsonWriter::writeBool, _1, b)); -} - -JSONDocument::JSONDocument(double d) { - InitJSONDocument(&data_, - &value_, - std::bind(&fbson::FbsonWriter::writeDouble, _1, d)); -} - -JSONDocument::JSONDocument(int8_t i) { - InitJSONDocument(&data_, - &value_, - std::bind(&fbson::FbsonWriter::writeInt8, _1, i)); -} - -JSONDocument::JSONDocument(int16_t i) { - InitJSONDocument(&data_, - &value_, - std::bind(&fbson::FbsonWriter::writeInt16, _1, i)); -} - -JSONDocument::JSONDocument(int32_t i) { - InitJSONDocument(&data_, - &value_, - std::bind(&fbson::FbsonWriter::writeInt32, _1, i)); -} - -JSONDocument::JSONDocument(int64_t i) { - InitJSONDocument(&data_, - &value_, - std::bind(&fbson::FbsonWriter::writeInt64, _1, i)); -} - -JSONDocument::JSONDocument(const std::string& s) { - InitString(&data_, &value_, s); -} - -JSONDocument::JSONDocument(const char* s) : JSONDocument(std::string(s)) { -} - -void JSONDocument::InitFromValue(const fbson::FbsonValue* val) { - data_.reset(new char[val->numPackedBytes()]); - memcpy(data_.get(), val, val->numPackedBytes()); - value_ = reinterpret_cast(data_.get()); -} - -// Private constructor -JSONDocument::JSONDocument(fbson::FbsonValue* val, bool makeCopy) { - if (makeCopy) { - InitFromValue(val); - } else { - value_ = val; - } -} - -JSONDocument::JSONDocument(Type _type) { - // TODO(icanadi) make all of this better by using templates - switch (_type) { - case kNull: - InitJSONDocument(&data_, &value_, - std::bind(&fbson::FbsonWriter::writeNull, _1)); - break; - case kObject: - CreateObject(&data_, &value_); - break; - case kBool: - InitJSONDocument(&data_, &value_, - std::bind(&fbson::FbsonWriter::writeBool, _1, false)); - break; - case kDouble: - InitJSONDocument(&data_, &value_, - std::bind(&fbson::FbsonWriter::writeDouble, _1, 0.)); - break; - case kArray: - CreateArray(&data_, &value_); - break; - case kInt64: - InitJSONDocument(&data_, &value_, - std::bind(&fbson::FbsonWriter::writeInt64, _1, 0)); - break; - case kString: - InitString(&data_, &value_, ""); - break; - default: - assert(false); - } -} - -JSONDocument::JSONDocument(const JSONDocument& jsonDocument) { - if (jsonDocument.IsOwner()) { - InitFromValue(jsonDocument.value_); - } else { - value_ = jsonDocument.value_; - } -} - -JSONDocument::JSONDocument(JSONDocument&& jsonDocument) { - value_ = jsonDocument.value_; - data_.swap(jsonDocument.data_); -} - -JSONDocument& JSONDocument::operator=(JSONDocument jsonDocument) { - value_ = jsonDocument.value_; - data_.swap(jsonDocument.data_); - return *this; -} - -JSONDocument::Type JSONDocument::type() const { - switch (value_->type()) { - case fbson::FbsonType::T_Null: - return JSONDocument::kNull; - - case fbson::FbsonType::T_True: - case fbson::FbsonType::T_False: - return JSONDocument::kBool; - - case fbson::FbsonType::T_Int8: - case fbson::FbsonType::T_Int16: - case fbson::FbsonType::T_Int32: - case fbson::FbsonType::T_Int64: - return JSONDocument::kInt64; - - case fbson::FbsonType::T_Double: - return JSONDocument::kDouble; - - case fbson::FbsonType::T_String: - return JSONDocument::kString; - - case fbson::FbsonType::T_Object: - return JSONDocument::kObject; - - case fbson::FbsonType::T_Array: - return JSONDocument::kArray; - - case fbson::FbsonType::T_Binary: - default: - assert(false); - } - return JSONDocument::kNull; -} - -bool JSONDocument::Contains(const std::string& key) const { - assert(IsObject()); - auto objectVal = reinterpret_cast(value_); - return objectVal->find(key.c_str()) != nullptr; -} - -JSONDocument JSONDocument::operator[](const std::string& key) const { - assert(IsObject()); - auto objectVal = reinterpret_cast(value_); - auto foundValue = objectVal->find(key.c_str()); - assert(foundValue != nullptr); - // No need to save paths in const objects - JSONDocument ans(foundValue, false); - return ans; -} - -size_t JSONDocument::Count() const { - assert(IsObject() || IsArray()); - if (IsObject()) { - // TODO(stash): add to fbson? - const fbson::ObjectVal& objectVal = - *reinterpret_cast(value_); - return ObjectNumElem(objectVal); - } else if (IsArray()) { - auto arrayVal = reinterpret_cast(value_); - return arrayVal->numElem(); - } - assert(false); - return 0; -} - -JSONDocument JSONDocument::operator[](size_t i) const { - assert(IsArray()); - auto arrayVal = reinterpret_cast(value_); - auto foundValue = arrayVal->get(static_cast(i)); - JSONDocument ans(foundValue, false); - return ans; -} - -bool JSONDocument::IsNull() const { - return value_->isNull(); -} - -bool JSONDocument::IsArray() const { - return value_->isArray(); -} - -bool JSONDocument::IsBool() const { - return value_->isTrue() || value_->isFalse(); -} - -bool JSONDocument::IsDouble() const { - return value_->isDouble(); -} - -bool JSONDocument::IsInt64() const { - return value_->isInt8() || value_->isInt16() || - value_->isInt32() || value_->isInt64(); -} - -bool JSONDocument::IsObject() const { - return value_->isObject(); -} - -bool JSONDocument::IsString() const { - return value_->isString(); -} - -bool JSONDocument::GetBool() const { - assert(IsBool()); - return value_->isTrue(); -} - -double JSONDocument::GetDouble() const { - assert(IsDouble()); - return ((fbson::DoubleVal*)value_)->val(); -} - -int64_t JSONDocument::GetInt64() const { - assert(IsInt64()); - return GetInt64ValFromFbsonNumericType(value_); -} - -std::string JSONDocument::GetString() const { - assert(IsString()); - fbson::StringVal* stringVal = (fbson::StringVal*)value_; - return std::string(stringVal->getBlob(), stringVal->getBlobLen()); -} - -namespace { - -// FbsonValue can be int8, int16, int32, int64 -bool CompareNumeric(fbson::FbsonValue* left, fbson::FbsonValue* right) { - assert(IsNumeric(left) && IsNumeric(right)); - return GetInt64ValFromFbsonNumericType(left) == - GetInt64ValFromFbsonNumericType(right); -} - -bool CompareSimpleTypes(fbson::FbsonValue* left, fbson::FbsonValue* right) { - if (IsNumeric(left)) { - return CompareNumeric(left, right); - } - if (left->numPackedBytes() != right->numPackedBytes()) { - return false; - } - return memcmp(left, right, left->numPackedBytes()) == 0; -} - -bool CompareFbsonValue(fbson::FbsonValue* left, fbson::FbsonValue* right) { - if (!IsComparable(left, right)) { - return false; - } - - switch (left->type()) { - case fbson::FbsonType::T_True: - case fbson::FbsonType::T_False: - case fbson::FbsonType::T_Null: - return true; - case fbson::FbsonType::T_Int8: - case fbson::FbsonType::T_Int16: - case fbson::FbsonType::T_Int32: - case fbson::FbsonType::T_Int64: - return CompareNumeric(left, right); - case fbson::FbsonType::T_String: - case fbson::FbsonType::T_Double: - return CompareSimpleTypes(left, right); - case fbson::FbsonType::T_Object: - { - auto leftObject = reinterpret_cast(left); - auto rightObject = reinterpret_cast(right); - if (ObjectNumElem(*leftObject) != ObjectNumElem(*rightObject)) { - return false; - } - for (auto && keyValue : *leftObject) { - std::string str(keyValue.getKeyStr(), keyValue.klen()); - if (rightObject->find(str.c_str()) == nullptr) { - return false; - } - if (!CompareFbsonValue(keyValue.value(), - rightObject->find(str.c_str()))) { - return false; - } - } - return true; - } - case fbson::FbsonType::T_Array: - { - auto leftArr = reinterpret_cast(left); - auto rightArr = reinterpret_cast(right); - if (leftArr->numElem() != rightArr->numElem()) { - return false; - } - for (int i = 0; i < static_cast(leftArr->numElem()); ++i) { - if (!CompareFbsonValue(leftArr->get(i), rightArr->get(i))) { - return false; - } - } - return true; - } - default: - assert(false); - } - return false; -} - -} // namespace - -bool JSONDocument::operator==(const JSONDocument& rhs) const { - return CompareFbsonValue(value_, rhs.value_); -} - -bool JSONDocument::operator!=(const JSONDocument& rhs) const { - return !(*this == rhs); -} - -JSONDocument JSONDocument::Copy() const { - return JSONDocument(value_, true); -} - -bool JSONDocument::IsOwner() const { - return data_.get() != nullptr; -} - -std::string JSONDocument::DebugString() const { - fbson::FbsonToJson fbsonToJson; - return fbsonToJson.json(value_); -} - -JSONDocument::ItemsIteratorGenerator JSONDocument::Items() const { - assert(IsObject()); - return ItemsIteratorGenerator(*(reinterpret_cast(value_))); -} - -// TODO(icanadi) (perf) allocate objects with arena -JSONDocument* JSONDocument::ParseJSON(const char* json) { - fbson::FbsonJsonParser parser; - if (!parser.parse(json)) { - return nullptr; - } - - auto fbsonVal = fbson::FbsonDocument::createValue( - parser.getWriter().getOutput()->getBuffer(), - static_cast(parser.getWriter().getOutput()->getSize())); - - if (fbsonVal == nullptr) { - return nullptr; - } - - return new JSONDocument(fbsonVal, true); -} - -void JSONDocument::Serialize(std::string* dst) const { - // first byte is reserved for header - // currently, header is only version number. that will help us provide - // backwards compatility. we might also store more information here if - // necessary - dst->push_back(kSerializationFormatVersion); - dst->push_back(FBSON_VER); - dst->append(reinterpret_cast(value_), value_->numPackedBytes()); -} - -const char JSONDocument::kSerializationFormatVersion = 2; - -JSONDocument* JSONDocument::Deserialize(const Slice& src) { - Slice input(src); - if (src.size() == 0) { - return nullptr; - } - char header = input[0]; - if (header == 1) { - assert(false); - } - input.remove_prefix(1); - auto value = fbson::FbsonDocument::createValue(input.data(), - static_cast(input.size())); - if (value == nullptr) { - return nullptr; - } - - return new JSONDocument(value, true); -} - -class JSONDocument::const_item_iterator::Impl { - public: - typedef fbson::ObjectVal::const_iterator It; - - explicit Impl(It it) : it_(it) {} - - const char* getKeyStr() const { - return it_->getKeyStr(); - } - - uint8_t klen() const { - return it_->klen(); - } - - It& operator++() { - return ++it_; - } - - bool operator!=(const Impl& other) { - return it_ != other.it_; - } - - fbson::FbsonValue* value() const { - return it_->value(); - } - - private: - It it_; -}; - -JSONDocument::const_item_iterator::const_item_iterator(Impl* impl) -: it_(impl) {} - -JSONDocument::const_item_iterator::const_item_iterator(const_item_iterator&& a) -: it_(std::move(a.it_)) {} - -JSONDocument::const_item_iterator& - JSONDocument::const_item_iterator::operator++() { - ++(*it_); - return *this; -} - -bool JSONDocument::const_item_iterator::operator!=( - const const_item_iterator& other) { - return *it_ != *(other.it_); -} - -JSONDocument::const_item_iterator::~const_item_iterator() { -} - -JSONDocument::const_item_iterator::value_type - JSONDocument::const_item_iterator::operator*() { - return JSONDocument::const_item_iterator::value_type(std::string(it_->getKeyStr(), it_->klen()), - JSONDocument(it_->value(), false)); -} - -JSONDocument::ItemsIteratorGenerator::ItemsIteratorGenerator( - const fbson::ObjectVal& object) - : object_(object) {} - -JSONDocument::const_item_iterator - JSONDocument::ItemsIteratorGenerator::begin() const { - return const_item_iterator(new const_item_iterator::Impl(object_.begin())); -} - -JSONDocument::const_item_iterator - JSONDocument::ItemsIteratorGenerator::end() const { - return const_item_iterator(new const_item_iterator::Impl(object_.end())); -} - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/document/json_document_builder.cc b/utilities/document/json_document_builder.cc deleted file mode 100644 index 7aa95e465..000000000 --- a/utilities/document/json_document_builder.cc +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef ROCKSDB_LITE -#include -#include -#include -#include "rocksdb/utilities/json_document.h" -#include "third-party/fbson/FbsonWriter.h" - -namespace rocksdb { -JSONDocumentBuilder::JSONDocumentBuilder() -: writer_(new fbson::FbsonWriter()) { -} - -JSONDocumentBuilder::JSONDocumentBuilder(fbson::FbsonOutStream* out) -: writer_(new fbson::FbsonWriter(*out)) { -} - -void JSONDocumentBuilder::Reset() { - writer_->reset(); -} - -bool JSONDocumentBuilder::WriteStartArray() { - return writer_->writeStartArray(); -} - -bool JSONDocumentBuilder::WriteEndArray() { - return writer_->writeEndArray(); -} - -bool JSONDocumentBuilder::WriteStartObject() { - return writer_->writeStartObject(); -} - -bool JSONDocumentBuilder::WriteEndObject() { - return writer_->writeEndObject(); -} - -bool JSONDocumentBuilder::WriteKeyValue(const std::string& key, - const JSONDocument& value) { - assert(key.size() <= std::numeric_limits::max()); - size_t bytesWritten = writer_->writeKey(key.c_str(), - static_cast(key.size())); - if (bytesWritten == 0) { - return false; - } - return WriteJSONDocument(value); -} - -bool JSONDocumentBuilder::WriteJSONDocument(const JSONDocument& value) { - switch (value.type()) { - case JSONDocument::kNull: - return writer_->writeNull() != 0; - case JSONDocument::kInt64: - return writer_->writeInt64(value.GetInt64()); - case JSONDocument::kDouble: - return writer_->writeDouble(value.GetDouble()); - case JSONDocument::kBool: - return writer_->writeBool(value.GetBool()); - case JSONDocument::kString: - { - bool res = writer_->writeStartString(); - if (!res) { - return false; - } - const std::string& str = value.GetString(); - res = writer_->writeString(str.c_str(), - static_cast(str.size())); - if (!res) { - return false; - } - return writer_->writeEndString(); - } - case JSONDocument::kArray: - { - bool res = WriteStartArray(); - if (!res) { - return false; - } - for (size_t i = 0; i < value.Count(); ++i) { - res = WriteJSONDocument(value[i]); - if (!res) { - return false; - } - } - return WriteEndArray(); - } - case JSONDocument::kObject: - { - bool res = WriteStartObject(); - if (!res) { - return false; - } - for (auto keyValue : value.Items()) { - WriteKeyValue(keyValue.first, keyValue.second); - } - return WriteEndObject(); - } - default: - assert(false); - } - return false; -} - -JSONDocument JSONDocumentBuilder::GetJSONDocument() { - fbson::FbsonValue* value = - fbson::FbsonDocument::createValue(writer_->getOutput()->getBuffer(), - static_cast(writer_->getOutput()->getSize())); - return JSONDocument(value, true); -} - -JSONDocumentBuilder::~JSONDocumentBuilder() { -} - -} // namespace rocksdb - -#endif // ROCKSDB_LITE diff --git a/utilities/document/json_document_test.cc b/utilities/document/json_document_test.cc deleted file mode 100644 index 9d79c41cf..000000000 --- a/utilities/document/json_document_test.cc +++ /dev/null @@ -1,343 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef ROCKSDB_LITE - -#include -#include -#include - -#include "rocksdb/utilities/json_document.h" - -#include "util/testutil.h" -#include "util/testharness.h" - -namespace rocksdb { -namespace { -void AssertField(const JSONDocument& json, const std::string& field) { - ASSERT_TRUE(json.Contains(field)); - ASSERT_TRUE(json[field].IsNull()); -} - -void AssertField(const JSONDocument& json, const std::string& field, - const std::string& expected) { - ASSERT_TRUE(json.Contains(field)); - ASSERT_TRUE(json[field].IsString()); - ASSERT_EQ(expected, json[field].GetString()); -} - -void AssertField(const JSONDocument& json, const std::string& field, - int64_t expected) { - ASSERT_TRUE(json.Contains(field)); - ASSERT_TRUE(json[field].IsInt64()); - ASSERT_EQ(expected, json[field].GetInt64()); -} - -void AssertField(const JSONDocument& json, const std::string& field, - bool expected) { - ASSERT_TRUE(json.Contains(field)); - ASSERT_TRUE(json[field].IsBool()); - ASSERT_EQ(expected, json[field].GetBool()); -} - -void AssertField(const JSONDocument& json, const std::string& field, - double expected) { - ASSERT_TRUE(json.Contains(field)); - ASSERT_TRUE(json[field].IsDouble()); - ASSERT_DOUBLE_EQ(expected, json[field].GetDouble()); -} -} // namespace - -class JSONDocumentTest : public testing::Test { - public: - JSONDocumentTest() - : rnd_(101) - {} - - void AssertSampleJSON(const JSONDocument& json) { - AssertField(json, "title", std::string("json")); - AssertField(json, "type", std::string("object")); - // properties - ASSERT_TRUE(json.Contains("properties")); - ASSERT_TRUE(json["properties"].Contains("flags")); - ASSERT_TRUE(json["properties"]["flags"].IsArray()); - ASSERT_EQ(3u, json["properties"]["flags"].Count()); - ASSERT_TRUE(json["properties"]["flags"][0].IsInt64()); - ASSERT_EQ(10, json["properties"]["flags"][0].GetInt64()); - ASSERT_TRUE(json["properties"]["flags"][1].IsString()); - ASSERT_EQ("parse", json["properties"]["flags"][1].GetString()); - ASSERT_TRUE(json["properties"]["flags"][2].IsObject()); - AssertField(json["properties"]["flags"][2], "tag", std::string("no")); - AssertField(json["properties"]["flags"][2], std::string("status")); - AssertField(json["properties"], "age", 110.5e-4); - AssertField(json["properties"], "depth", static_cast(-10)); - // test iteration - std::set expected({"flags", "age", "depth"}); - for (auto item : json["properties"].Items()) { - auto iter = expected.find(item.first); - ASSERT_TRUE(iter != expected.end()); - expected.erase(iter); - } - ASSERT_EQ(0U, expected.size()); - ASSERT_TRUE(json.Contains("latlong")); - ASSERT_TRUE(json["latlong"].IsArray()); - ASSERT_EQ(2u, json["latlong"].Count()); - ASSERT_TRUE(json["latlong"][0].IsDouble()); - ASSERT_EQ(53.25, json["latlong"][0].GetDouble()); - ASSERT_TRUE(json["latlong"][1].IsDouble()); - ASSERT_EQ(43.75, json["latlong"][1].GetDouble()); - AssertField(json, "enabled", true); - } - - const std::string kSampleJSON = - "{ \"title\" : \"json\", \"type\" : \"object\", \"properties\" : { " - "\"flags\": [10, \"parse\", {\"tag\": \"no\", \"status\": null}], " - "\"age\": 110.5e-4, \"depth\": -10 }, \"latlong\": [53.25, 43.75], " - "\"enabled\": true }"; - - const std::string kSampleJSONDifferent = - "{ \"title\" : \"json\", \"type\" : \"object\", \"properties\" : { " - "\"flags\": [10, \"parse\", {\"tag\": \"no\", \"status\": 2}], " - "\"age\": 110.5e-4, \"depth\": -10 }, \"latlong\": [53.25, 43.75], " - "\"enabled\": true }"; - - Random rnd_; -}; - -TEST_F(JSONDocumentTest, MakeNullTest) { - JSONDocument x; - ASSERT_TRUE(x.IsNull()); - ASSERT_TRUE(x.IsOwner()); - ASSERT_TRUE(!x.IsBool()); -} - -TEST_F(JSONDocumentTest, MakeBoolTest) { - { - JSONDocument x(true); - ASSERT_TRUE(x.IsOwner()); - ASSERT_TRUE(x.IsBool()); - ASSERT_TRUE(!x.IsInt64()); - ASSERT_EQ(x.GetBool(), true); - } - - { - JSONDocument x(false); - ASSERT_TRUE(x.IsOwner()); - ASSERT_TRUE(x.IsBool()); - ASSERT_TRUE(!x.IsInt64()); - ASSERT_EQ(x.GetBool(), false); - } -} - -TEST_F(JSONDocumentTest, MakeInt64Test) { - JSONDocument x(static_cast(16)); - ASSERT_TRUE(x.IsInt64()); - ASSERT_TRUE(x.IsInt64()); - ASSERT_TRUE(!x.IsBool()); - ASSERT_TRUE(x.IsOwner()); - ASSERT_EQ(x.GetInt64(), 16); -} - -TEST_F(JSONDocumentTest, MakeStringTest) { - JSONDocument x("string"); - ASSERT_TRUE(x.IsOwner()); - ASSERT_TRUE(x.IsString()); - ASSERT_TRUE(!x.IsBool()); - ASSERT_EQ(x.GetString(), "string"); -} - -TEST_F(JSONDocumentTest, MakeDoubleTest) { - JSONDocument x(5.6); - ASSERT_TRUE(x.IsOwner()); - ASSERT_TRUE(x.IsDouble()); - ASSERT_TRUE(!x.IsBool()); - ASSERT_EQ(x.GetDouble(), 5.6); -} - -TEST_F(JSONDocumentTest, MakeByTypeTest) { - { - JSONDocument x(JSONDocument::kNull); - ASSERT_TRUE(x.IsNull()); - } - { - JSONDocument x(JSONDocument::kBool); - ASSERT_TRUE(x.IsBool()); - } - { - JSONDocument x(JSONDocument::kString); - ASSERT_TRUE(x.IsString()); - } - { - JSONDocument x(JSONDocument::kInt64); - ASSERT_TRUE(x.IsInt64()); - } - { - JSONDocument x(JSONDocument::kDouble); - ASSERT_TRUE(x.IsDouble()); - } - { - JSONDocument x(JSONDocument::kObject); - ASSERT_TRUE(x.IsObject()); - } - { - JSONDocument x(JSONDocument::kArray); - ASSERT_TRUE(x.IsArray()); - } -} - -TEST_F(JSONDocumentTest, Parsing) { - std::unique_ptr parsed_json( - JSONDocument::ParseJSON(kSampleJSON.c_str())); - ASSERT_TRUE(parsed_json->IsOwner()); - ASSERT_TRUE(parsed_json != nullptr); - AssertSampleJSON(*parsed_json); - - // test deep copying - JSONDocument copied_json_document(*parsed_json); - AssertSampleJSON(copied_json_document); - ASSERT_TRUE(copied_json_document == *parsed_json); - - std::unique_ptr parsed_different_sample( - JSONDocument::ParseJSON(kSampleJSONDifferent.c_str())); - ASSERT_TRUE(parsed_different_sample != nullptr); - ASSERT_TRUE(!(*parsed_different_sample == copied_json_document)); - - // parse error - const std::string kFaultyJSON = - kSampleJSON.substr(0, kSampleJSON.size() - 10); - ASSERT_TRUE(JSONDocument::ParseJSON(kFaultyJSON.c_str()) == nullptr); -} - -TEST_F(JSONDocumentTest, Serialization) { - std::unique_ptr parsed_json( - JSONDocument::ParseJSON(kSampleJSON.c_str())); - ASSERT_TRUE(parsed_json != nullptr); - ASSERT_TRUE(parsed_json->IsOwner()); - std::string serialized; - parsed_json->Serialize(&serialized); - - std::unique_ptr deserialized_json( - JSONDocument::Deserialize(Slice(serialized))); - ASSERT_TRUE(deserialized_json != nullptr); - AssertSampleJSON(*deserialized_json); - - // deserialization failure - ASSERT_TRUE(JSONDocument::Deserialize( - Slice(serialized.data(), serialized.size() - 10)) == nullptr); -} - -TEST_F(JSONDocumentTest, OperatorEqualsTest) { - // kNull - ASSERT_TRUE(JSONDocument() == JSONDocument()); - - // kBool - ASSERT_TRUE(JSONDocument(false) != JSONDocument()); - ASSERT_TRUE(JSONDocument(false) == JSONDocument(false)); - ASSERT_TRUE(JSONDocument(true) == JSONDocument(true)); - ASSERT_TRUE(JSONDocument(false) != JSONDocument(true)); - - // kString - ASSERT_TRUE(JSONDocument("test") != JSONDocument()); - ASSERT_TRUE(JSONDocument("test") == JSONDocument("test")); - - // kInt64 - ASSERT_TRUE(JSONDocument(static_cast(15)) != JSONDocument()); - ASSERT_TRUE(JSONDocument(static_cast(15)) != - JSONDocument(static_cast(14))); - ASSERT_TRUE(JSONDocument(static_cast(15)) == - JSONDocument(static_cast(15))); - - std::unique_ptr arrayWithInt8Doc( - JSONDocument::ParseJSON("[8]")); - ASSERT_TRUE(arrayWithInt8Doc != nullptr); - ASSERT_TRUE(arrayWithInt8Doc->IsArray()); - ASSERT_TRUE((*arrayWithInt8Doc)[0].IsInt64()); - ASSERT_TRUE((*arrayWithInt8Doc)[0] == JSONDocument(static_cast(8))); - - std::unique_ptr arrayWithInt16Doc( - JSONDocument::ParseJSON("[512]")); - ASSERT_TRUE(arrayWithInt16Doc != nullptr); - ASSERT_TRUE(arrayWithInt16Doc->IsArray()); - ASSERT_TRUE((*arrayWithInt16Doc)[0].IsInt64()); - ASSERT_TRUE((*arrayWithInt16Doc)[0] == - JSONDocument(static_cast(512))); - - std::unique_ptr arrayWithInt32Doc( - JSONDocument::ParseJSON("[1000000]")); - ASSERT_TRUE(arrayWithInt32Doc != nullptr); - ASSERT_TRUE(arrayWithInt32Doc->IsArray()); - ASSERT_TRUE((*arrayWithInt32Doc)[0].IsInt64()); - ASSERT_TRUE((*arrayWithInt32Doc)[0] == - JSONDocument(static_cast(1000000))); - - // kDouble - ASSERT_TRUE(JSONDocument(15.) != JSONDocument()); - ASSERT_TRUE(JSONDocument(15.) != JSONDocument(14.)); - ASSERT_TRUE(JSONDocument(15.) == JSONDocument(15.)); -} - -TEST_F(JSONDocumentTest, JSONDocumentBuilderTest) { - std::unique_ptr parsedArray( - JSONDocument::ParseJSON("[1, [123, \"a\", \"b\"], {\"b\":\"c\"}]")); - ASSERT_TRUE(parsedArray != nullptr); - - JSONDocumentBuilder builder; - ASSERT_TRUE(builder.WriteStartArray()); - ASSERT_TRUE(builder.WriteJSONDocument(1)); - - ASSERT_TRUE(builder.WriteStartArray()); - ASSERT_TRUE(builder.WriteJSONDocument(123)); - ASSERT_TRUE(builder.WriteJSONDocument("a")); - ASSERT_TRUE(builder.WriteJSONDocument("b")); - ASSERT_TRUE(builder.WriteEndArray()); - - ASSERT_TRUE(builder.WriteStartObject()); - ASSERT_TRUE(builder.WriteKeyValue("b", "c")); - ASSERT_TRUE(builder.WriteEndObject()); - - ASSERT_TRUE(builder.WriteEndArray()); - - ASSERT_TRUE(*parsedArray == builder.GetJSONDocument()); -} - -TEST_F(JSONDocumentTest, OwnershipTest) { - std::unique_ptr parsed( - JSONDocument::ParseJSON(kSampleJSON.c_str())); - ASSERT_TRUE(parsed != nullptr); - ASSERT_TRUE(parsed->IsOwner()); - - // Copy constructor from owner -> owner - JSONDocument copy_constructor(*parsed); - ASSERT_TRUE(copy_constructor.IsOwner()); - - // Copy constructor from non-owner -> non-owner - JSONDocument non_owner((*parsed)["properties"]); - ASSERT_TRUE(!non_owner.IsOwner()); - - // Move constructor from owner -> owner - JSONDocument moved_from_owner(std::move(copy_constructor)); - ASSERT_TRUE(moved_from_owner.IsOwner()); - - // Move constructor from non-owner -> non-owner - JSONDocument moved_from_non_owner(std::move(non_owner)); - ASSERT_TRUE(!moved_from_non_owner.IsOwner()); -} - -} // namespace rocksdb - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -#else -#include - -int main(int /*argc*/, char** /*argv*/) { - fprintf(stderr, "SKIPPED as JSONDocument is not supported in ROCKSDB_LITE\n"); - return 0; -} - -#endif // !ROCKSDB_LITE diff --git a/utilities/geodb/geodb_impl.cc b/utilities/geodb/geodb_impl.cc deleted file mode 100644 index 9150b16b2..000000000 --- a/utilities/geodb/geodb_impl.cc +++ /dev/null @@ -1,478 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#ifndef ROCKSDB_LITE - -#include "utilities/geodb/geodb_impl.h" - -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif - -#include -#include -#include -#include -#include "util/coding.h" -#include "util/filename.h" -#include "util/string_util.h" - -// -// There are two types of keys. The first type of key-values -// maps a geo location to the set of object ids and their values. -// Table 1 -// key : p + : + $quadkey + : + $id + -// : + $latitude + : + $longitude -// value : value of the object -// This table can be used to find all objects that reside near -// a specified geolocation. -// -// Table 2 -// key : 'k' + : + $id -// value: $quadkey - -namespace rocksdb { - -const double GeoDBImpl::PI = 3.141592653589793; -const double GeoDBImpl::EarthRadius = 6378137; -const double GeoDBImpl::MinLatitude = -85.05112878; -const double GeoDBImpl::MaxLatitude = 85.05112878; -const double GeoDBImpl::MinLongitude = -180; -const double GeoDBImpl::MaxLongitude = 180; - -GeoDBImpl::GeoDBImpl(DB* db, const GeoDBOptions& options) : - GeoDB(db, options), db_(db), options_(options) { -} - -GeoDBImpl::~GeoDBImpl() { -} - -Status GeoDBImpl::Insert(const GeoObject& obj) { - WriteBatch batch; - - // It is possible that this id is already associated with - // with a different position. We first have to remove that - // association before we can insert the new one. - - // remove existing object, if it exists - GeoObject old; - Status status = GetById(obj.id, &old); - if (status.ok()) { - assert(obj.id.compare(old.id) == 0); - std::string quadkey = PositionToQuad(old.position, Detail); - std::string key1 = MakeKey1(old.position, old.id, quadkey); - std::string key2 = MakeKey2(old.id); - batch.Delete(Slice(key1)); - batch.Delete(Slice(key2)); - } else if (status.IsNotFound()) { - // What if another thread is trying to insert the same ID concurrently? - } else { - return status; - } - - // insert new object - std::string quadkey = PositionToQuad(obj.position, Detail); - std::string key1 = MakeKey1(obj.position, obj.id, quadkey); - std::string key2 = MakeKey2(obj.id); - batch.Put(Slice(key1), Slice(obj.value)); - batch.Put(Slice(key2), Slice(quadkey)); - return db_->Write(woptions_, &batch); -} - -Status GeoDBImpl::GetByPosition(const GeoPosition& pos, - const Slice& id, - std::string* value) { - std::string quadkey = PositionToQuad(pos, Detail); - std::string key1 = MakeKey1(pos, id, quadkey); - return db_->Get(roptions_, Slice(key1), value); -} - -Status GeoDBImpl::GetById(const Slice& id, GeoObject* object) { - Status status; - std::string quadkey; - - // create an iterator so that we can get a consistent picture - // of the database. - Iterator* iter = db_->NewIterator(roptions_); - - // create key for table2 - std::string kt = MakeKey2(id); - Slice key2(kt); - - iter->Seek(key2); - if (iter->Valid() && iter->status().ok()) { - if (iter->key().compare(key2) == 0) { - quadkey = iter->value().ToString(); - } - } - if (quadkey.size() == 0) { - delete iter; - return Status::NotFound(key2); - } - - // - // Seek to the quadkey + id prefix - // - std::string prefix = MakeKey1Prefix(quadkey, id); - iter->Seek(Slice(prefix)); - assert(iter->Valid()); - if (!iter->Valid() || !iter->status().ok()) { - delete iter; - return Status::NotFound(); - } - - // split the key into p + quadkey + id + lat + lon - Slice key = iter->key(); - std::vector parts = StringSplit(key.ToString(), ':'); - assert(parts.size() == 5); - assert(parts[0] == "p"); - assert(parts[1] == quadkey); - assert(parts[2] == id); - - // fill up output parameters - object->position.latitude = atof(parts[3].c_str()); - object->position.longitude = atof(parts[4].c_str()); - object->id = id.ToString(); // this is redundant - object->value = iter->value().ToString(); - delete iter; - return Status::OK(); -} - - -Status GeoDBImpl::Remove(const Slice& id) { - // Read the object from the database - GeoObject obj; - Status status = GetById(id, &obj); - if (!status.ok()) { - return status; - } - - // remove the object by atomically deleting it from both tables - std::string quadkey = PositionToQuad(obj.position, Detail); - std::string key1 = MakeKey1(obj.position, obj.id, quadkey); - std::string key2 = MakeKey2(obj.id); - WriteBatch batch; - batch.Delete(Slice(key1)); - batch.Delete(Slice(key2)); - return db_->Write(woptions_, &batch); -} - -class GeoIteratorImpl : public GeoIterator { - private: - std::vector values_; - std::vector::iterator iter_; - public: - explicit GeoIteratorImpl(std::vector values) - : values_(std::move(values)) { - iter_ = values_.begin(); - } - virtual void Next() override; - virtual bool Valid() const override; - virtual const GeoObject& geo_object() override; - virtual Status status() const override; -}; - -class GeoErrorIterator : public GeoIterator { - private: - Status status_; - public: - explicit GeoErrorIterator(Status s) : status_(s) {} - virtual void Next() override {}; - virtual bool Valid() const override { return false; } - virtual const GeoObject& geo_object() override { - GeoObject* g = new GeoObject(); - return *g; - } - virtual Status status() const override { return status_; } -}; - -void GeoIteratorImpl::Next() { - assert(Valid()); - iter_++; -} - -bool GeoIteratorImpl::Valid() const { - return iter_ != values_.end(); -} - -const GeoObject& GeoIteratorImpl::geo_object() { - assert(Valid()); - return *iter_; -} - -Status GeoIteratorImpl::status() const { - return Status::OK(); -} - -GeoIterator* GeoDBImpl::SearchRadial(const GeoPosition& pos, - double radius, - int number_of_values) { - std::vector values; - - // Gather all bounding quadkeys - std::vector qids; - Status s = searchQuadIds(pos, radius, &qids); - if (!s.ok()) { - return new GeoErrorIterator(s); - } - - // create an iterator - Iterator* iter = db_->NewIterator(ReadOptions()); - - // Process each prospective quadkey - for (const std::string& qid : qids) { - // The user is interested in only these many objects. - if (number_of_values == 0) { - break; - } - - // convert quadkey to db key prefix - std::string dbkey = MakeQuadKeyPrefix(qid); - - for (iter->Seek(dbkey); - number_of_values > 0 && iter->Valid() && iter->status().ok(); - iter->Next()) { - // split the key into p + quadkey + id + lat + lon - Slice key = iter->key(); - std::vector parts = StringSplit(key.ToString(), ':'); - assert(parts.size() == 5); - assert(parts[0] == "p"); - std::string* quadkey = &parts[1]; - - // If the key we are looking for is a prefix of the key - // we found from the database, then this is one of the keys - // we are looking for. - auto res = std::mismatch(qid.begin(), qid.end(), quadkey->begin()); - if (res.first == qid.end()) { - GeoPosition obj_pos(atof(parts[3].c_str()), atof(parts[4].c_str())); - GeoObject obj(obj_pos, parts[2], iter->value().ToString()); - values.push_back(obj); - number_of_values--; - } else { - break; - } - } - } - delete iter; - return new GeoIteratorImpl(std::move(values)); -} - -std::string GeoDBImpl::MakeKey1(const GeoPosition& pos, Slice id, - std::string quadkey) { - std::string lat = rocksdb::ToString(pos.latitude); - std::string lon = rocksdb::ToString(pos.longitude); - std::string key = "p:"; - key.reserve(5 + quadkey.size() + id.size() + lat.size() + lon.size()); - key.append(quadkey); - key.append(":"); - key.append(id.ToString()); - key.append(":"); - key.append(lat); - key.append(":"); - key.append(lon); - return key; -} - -std::string GeoDBImpl::MakeKey2(Slice id) { - std::string key = "k:"; - key.append(id.ToString()); - return key; -} - -std::string GeoDBImpl::MakeKey1Prefix(std::string quadkey, - Slice id) { - std::string key = "p:"; - key.reserve(4 + quadkey.size() + id.size()); - key.append(quadkey); - key.append(":"); - key.append(id.ToString()); - key.append(":"); - return key; -} - -std::string GeoDBImpl::MakeQuadKeyPrefix(std::string quadkey) { - std::string key = "p:"; - key.append(quadkey); - return key; -} - -// convert degrees to radians -double GeoDBImpl::radians(double x) { - return (x * PI) / 180; -} - -// convert radians to degrees -double GeoDBImpl::degrees(double x) { - return (x * 180) / PI; -} - -// convert a gps location to quad coordinate -std::string GeoDBImpl::PositionToQuad(const GeoPosition& pos, - int levelOfDetail) { - Pixel p = PositionToPixel(pos, levelOfDetail); - Tile tile = PixelToTile(p); - return TileToQuadKey(tile, levelOfDetail); -} - -GeoPosition GeoDBImpl::displaceLatLon(double lat, double lon, - double deltay, double deltax) { - double dLat = deltay / EarthRadius; - double dLon = deltax / (EarthRadius * cos(radians(lat))); - return GeoPosition(lat + degrees(dLat), - lon + degrees(dLon)); -} - -// -// Return the distance between two positions on the earth -// -double GeoDBImpl::distance(double lat1, double lon1, - double lat2, double lon2) { - double lon = radians(lon2 - lon1); - double lat = radians(lat2 - lat1); - - double a = (sin(lat / 2) * sin(lat / 2)) + - cos(radians(lat1)) * cos(radians(lat2)) * - (sin(lon / 2) * sin(lon / 2)); - double angle = 2 * atan2(sqrt(a), sqrt(1 - a)); - return angle * EarthRadius; -} - -// -// Returns all the quadkeys inside the search range -// -Status GeoDBImpl::searchQuadIds(const GeoPosition& position, - double radius, - std::vector* quadKeys) { - // get the outline of the search square - GeoPosition topLeftPos = boundingTopLeft(position, radius); - GeoPosition bottomRightPos = boundingBottomRight(position, radius); - - Pixel topLeft = PositionToPixel(topLeftPos, Detail); - Pixel bottomRight = PositionToPixel(bottomRightPos, Detail); - - // how many level of details to look for - int numberOfTilesAtMaxDepth = static_cast(std::floor((bottomRight.x - topLeft.x) / 256)); - int zoomLevelsToRise = static_cast(std::floor(std::log(numberOfTilesAtMaxDepth) / std::log(2))); - zoomLevelsToRise++; - int levels = std::max(0, Detail - zoomLevelsToRise); - - quadKeys->push_back(PositionToQuad(GeoPosition(topLeftPos.latitude, - topLeftPos.longitude), - levels)); - quadKeys->push_back(PositionToQuad(GeoPosition(topLeftPos.latitude, - bottomRightPos.longitude), - levels)); - quadKeys->push_back(PositionToQuad(GeoPosition(bottomRightPos.latitude, - topLeftPos.longitude), - levels)); - quadKeys->push_back(PositionToQuad(GeoPosition(bottomRightPos.latitude, - bottomRightPos.longitude), - levels)); - return Status::OK(); -} - -// Determines the ground resolution (in meters per pixel) at a specified -// latitude and level of detail. -// Latitude (in degrees) at which to measure the ground resolution. -// Level of detail, from 1 (lowest detail) to 23 (highest detail). -// Returns the ground resolution, in meters per pixel. -double GeoDBImpl::GroundResolution(double latitude, int levelOfDetail) { - latitude = clip(latitude, MinLatitude, MaxLatitude); - return cos(latitude * PI / 180) * 2 * PI * EarthRadius / - MapSize(levelOfDetail); -} - -// Converts a point from latitude/longitude WGS-84 coordinates (in degrees) -// into pixel XY coordinates at a specified level of detail. -GeoDBImpl::Pixel GeoDBImpl::PositionToPixel(const GeoPosition& pos, - int levelOfDetail) { - double latitude = clip(pos.latitude, MinLatitude, MaxLatitude); - double x = (pos.longitude + 180) / 360; - double sinLatitude = sin(latitude * PI / 180); - double y = 0.5 - std::log((1 + sinLatitude) / (1 - sinLatitude)) / (4 * PI); - double mapSize = MapSize(levelOfDetail); - double X = std::floor(clip(x * mapSize + 0.5, 0, mapSize - 1)); - double Y = std::floor(clip(y * mapSize + 0.5, 0, mapSize - 1)); - return Pixel((unsigned int)X, (unsigned int)Y); -} - -GeoPosition GeoDBImpl::PixelToPosition(const Pixel& pixel, int levelOfDetail) { - double mapSize = MapSize(levelOfDetail); - double x = (clip(pixel.x, 0, mapSize - 1) / mapSize) - 0.5; - double y = 0.5 - (clip(pixel.y, 0, mapSize - 1) / mapSize); - double latitude = 90 - 360 * atan(exp(-y * 2 * PI)) / PI; - double longitude = 360 * x; - return GeoPosition(latitude, longitude); -} - -// Converts a Pixel to a Tile -GeoDBImpl::Tile GeoDBImpl::PixelToTile(const Pixel& pixel) { - unsigned int tileX = static_cast(std::floor(pixel.x / 256)); - unsigned int tileY = static_cast(std::floor(pixel.y / 256)); - return Tile(tileX, tileY); -} - -GeoDBImpl::Pixel GeoDBImpl::TileToPixel(const Tile& tile) { - unsigned int pixelX = tile.x * 256; - unsigned int pixelY = tile.y * 256; - return Pixel(pixelX, pixelY); -} - -// Convert a Tile to a quadkey -std::string GeoDBImpl::TileToQuadKey(const Tile& tile, int levelOfDetail) { - std::stringstream quadKey; - for (int i = levelOfDetail; i > 0; i--) { - char digit = '0'; - int mask = 1 << (i - 1); - if ((tile.x & mask) != 0) { - digit++; - } - if ((tile.y & mask) != 0) { - digit++; - digit++; - } - quadKey << digit; - } - return quadKey.str(); -} - -// -// Convert a quadkey to a tile and its level of detail -// -void GeoDBImpl::QuadKeyToTile(std::string quadkey, Tile* tile, - int* levelOfDetail) { - tile->x = tile->y = 0; - *levelOfDetail = static_cast(quadkey.size()); - const char* key = reinterpret_cast(quadkey.c_str()); - for (int i = *levelOfDetail; i > 0; i--) { - int mask = 1 << (i - 1); - switch (key[*levelOfDetail - i]) { - case '0': - break; - - case '1': - tile->x |= mask; - break; - - case '2': - tile->y |= mask; - break; - - case '3': - tile->x |= mask; - tile->y |= mask; - break; - - default: - std::stringstream msg; - msg << quadkey; - msg << " Invalid QuadKey."; - throw std::runtime_error(msg.str()); - } - } -} -} // namespace rocksdb - -#endif // ROCKSDB_LITE diff --git a/utilities/geodb/geodb_impl.h b/utilities/geodb/geodb_impl.h deleted file mode 100644 index 6b15f5422..000000000 --- a/utilities/geodb/geodb_impl.h +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// - -#ifndef ROCKSDB_LITE - -#pragma once -#include -#include -#include -#include -#include -#include - -#include "rocksdb/utilities/geo_db.h" -#include "rocksdb/utilities/stackable_db.h" -#include "rocksdb/env.h" -#include "rocksdb/status.h" - -namespace rocksdb { - -// A specific implementation of GeoDB - -class GeoDBImpl : public GeoDB { - public: - GeoDBImpl(DB* db, const GeoDBOptions& options); - ~GeoDBImpl(); - - // Associate the GPS location with the identified by 'id'. The value - // is a blob that is associated with this object. - virtual Status Insert(const GeoObject& object) override; - - // Retrieve the value of the object located at the specified GPS - // location and is identified by the 'id'. - virtual Status GetByPosition(const GeoPosition& pos, const Slice& id, - std::string* value) override; - - // Retrieve the value of the object identified by the 'id'. This method - // could be potentially slower than GetByPosition - virtual Status GetById(const Slice& id, GeoObject* object) override; - - // Delete the specified object - virtual Status Remove(const Slice& id) override; - - // Returns a list of all items within a circular radius from the - // specified gps location - virtual GeoIterator* SearchRadial(const GeoPosition& pos, double radius, - int number_of_values) override; - - private: - DB* db_; - const GeoDBOptions options_; - const WriteOptions woptions_; - const ReadOptions roptions_; - - // MSVC requires the definition for this static const to be in .CC file - // The value of PI - static const double PI; - - // convert degrees to radians - static double radians(double x); - - // convert radians to degrees - static double degrees(double x); - - // A pixel class that captures X and Y coordinates - class Pixel { - public: - unsigned int x; - unsigned int y; - Pixel(unsigned int a, unsigned int b) : - x(a), y(b) { - } - }; - - // A Tile in the geoid - class Tile { - public: - unsigned int x; - unsigned int y; - Tile(unsigned int a, unsigned int b) : - x(a), y(b) { - } - }; - - // convert a gps location to quad coordinate - static std::string PositionToQuad(const GeoPosition& pos, int levelOfDetail); - - // arbitrary constant use for WGS84 via - // http://en.wikipedia.org/wiki/World_Geodetic_System - // http://mathforum.org/library/drmath/view/51832.html - // http://msdn.microsoft.com/en-us/library/bb259689.aspx - // http://www.tuicool.com/articles/NBrE73 - // - const int Detail = 23; - // MSVC requires the definition for this static const to be in .CC file - static const double EarthRadius; - static const double MinLatitude; - static const double MaxLatitude; - static const double MinLongitude; - static const double MaxLongitude; - - // clips a number to the specified minimum and maximum values. - static double clip(double n, double minValue, double maxValue) { - return fmin(fmax(n, minValue), maxValue); - } - - // Determines the map width and height (in pixels) at a specified level - // of detail, from 1 (lowest detail) to 23 (highest detail). - // Returns the map width and height in pixels. - static unsigned int MapSize(int levelOfDetail) { - return (unsigned int)(256 << levelOfDetail); - } - - // Determines the ground resolution (in meters per pixel) at a specified - // latitude and level of detail. - // Latitude (in degrees) at which to measure the ground resolution. - // Level of detail, from 1 (lowest detail) to 23 (highest detail). - // Returns the ground resolution, in meters per pixel. - static double GroundResolution(double latitude, int levelOfDetail); - - // Converts a point from latitude/longitude WGS-84 coordinates (in degrees) - // into pixel XY coordinates at a specified level of detail. - static Pixel PositionToPixel(const GeoPosition& pos, int levelOfDetail); - - static GeoPosition PixelToPosition(const Pixel& pixel, int levelOfDetail); - - // Converts a Pixel to a Tile - static Tile PixelToTile(const Pixel& pixel); - - static Pixel TileToPixel(const Tile& tile); - - // Convert a Tile to a quadkey - static std::string TileToQuadKey(const Tile& tile, int levelOfDetail); - - // Convert a quadkey to a tile and its level of detail - static void QuadKeyToTile(std::string quadkey, Tile* tile, - int *levelOfDetail); - - // Return the distance between two positions on the earth - static double distance(double lat1, double lon1, - double lat2, double lon2); - static GeoPosition displaceLatLon(double lat, double lon, - double deltay, double deltax); - - // - // Returns the top left position after applying the delta to - // the specified position - // - static GeoPosition boundingTopLeft(const GeoPosition& in, double radius) { - return displaceLatLon(in.latitude, in.longitude, -radius, -radius); - } - - // - // Returns the bottom right position after applying the delta to - // the specified position - static GeoPosition boundingBottomRight(const GeoPosition& in, - double radius) { - return displaceLatLon(in.latitude, in.longitude, radius, radius); - } - - // - // Get all quadkeys within a radius of a specified position - // - Status searchQuadIds(const GeoPosition& position, - double radius, - std::vector* quadKeys); - - // - // Create keys for accessing rocksdb table(s) - // - static std::string MakeKey1(const GeoPosition& pos, - Slice id, - std::string quadkey); - static std::string MakeKey2(Slice id); - static std::string MakeKey1Prefix(std::string quadkey, - Slice id); - static std::string MakeQuadKeyPrefix(std::string quadkey); -}; - -} // namespace rocksdb - -#endif // ROCKSDB_LITE diff --git a/utilities/geodb/geodb_test.cc b/utilities/geodb/geodb_test.cc deleted file mode 100644 index 8477c86a3..000000000 --- a/utilities/geodb/geodb_test.cc +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#ifndef ROCKSDB_LITE -#include "utilities/geodb/geodb_impl.h" - -#include -#include "util/testharness.h" - -namespace rocksdb { - -class GeoDBTest : public testing::Test { - public: - static const std::string kDefaultDbName; - static Options options; - DB* db; - GeoDB* geodb; - - GeoDBTest() { - GeoDBOptions geodb_options; - EXPECT_OK(DestroyDB(kDefaultDbName, options)); - options.create_if_missing = true; - Status status = DB::Open(options, kDefaultDbName, &db); - geodb = new GeoDBImpl(db, geodb_options); - } - - ~GeoDBTest() { - delete geodb; - } - - GeoDB* getdb() { - return geodb; - } -}; - -const std::string GeoDBTest::kDefaultDbName = - test::PerThreadDBPath("geodb_test"); -Options GeoDBTest::options = Options(); - -// Insert, Get and Remove -TEST_F(GeoDBTest, SimpleTest) { - GeoPosition pos1(100, 101); - std::string id1("id1"); - std::string value1("value1"); - - // insert first object into database - GeoObject obj1(pos1, id1, value1); - Status status = getdb()->Insert(obj1); - ASSERT_TRUE(status.ok()); - - // insert second object into database - GeoPosition pos2(200, 201); - std::string id2("id2"); - std::string value2 = "value2"; - GeoObject obj2(pos2, id2, value2); - status = getdb()->Insert(obj2); - ASSERT_TRUE(status.ok()); - - // retrieve first object using position - std::string value; - status = getdb()->GetByPosition(pos1, Slice(id1), &value); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(value, value1); - - // retrieve first object using id - GeoObject obj; - status = getdb()->GetById(Slice(id1), &obj); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(obj.position.latitude, 100); - ASSERT_EQ(obj.position.longitude, 101); - ASSERT_EQ(obj.id.compare(id1), 0); - ASSERT_EQ(obj.value, value1); - - // delete first object - status = getdb()->Remove(Slice(id1)); - ASSERT_TRUE(status.ok()); - status = getdb()->GetByPosition(pos1, Slice(id1), &value); - ASSERT_TRUE(status.IsNotFound()); - status = getdb()->GetById(id1, &obj); - ASSERT_TRUE(status.IsNotFound()); - - // check that we can still find second object - status = getdb()->GetByPosition(pos2, id2, &value); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(value, value2); - status = getdb()->GetById(id2, &obj); - ASSERT_TRUE(status.ok()); -} - -// Search. -// Verify distances via http://www.stevemorse.org/nearest/distance.php -TEST_F(GeoDBTest, Search) { - GeoPosition pos1(45, 45); - std::string id1("mid1"); - std::string value1 = "midvalue1"; - - // insert object at 45 degree latitude - GeoObject obj1(pos1, id1, value1); - Status status = getdb()->Insert(obj1); - ASSERT_TRUE(status.ok()); - - // search all objects centered at 46 degree latitude with - // a radius of 200 kilometers. We should find the one object that - // we inserted earlier. - GeoIterator* iter1 = getdb()->SearchRadial(GeoPosition(46, 46), 200000); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(iter1->geo_object().value, "midvalue1"); - uint32_t size = 0; - while (iter1->Valid()) { - GeoObject obj; - status = getdb()->GetById(Slice(id1), &obj); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(iter1->geo_object().position.latitude, pos1.latitude); - ASSERT_EQ(iter1->geo_object().position.longitude, pos1.longitude); - ASSERT_EQ(iter1->geo_object().id.compare(id1), 0); - ASSERT_EQ(iter1->geo_object().value, value1); - - size++; - iter1->Next(); - ASSERT_TRUE(!iter1->Valid()); - } - ASSERT_EQ(size, 1U); - delete iter1; - - // search all objects centered at 46 degree latitude with - // a radius of 2 kilometers. There should be none. - GeoIterator* iter2 = getdb()->SearchRadial(GeoPosition(46, 46), 2); - ASSERT_TRUE(status.ok()); - ASSERT_FALSE(iter2->Valid()); - delete iter2; -} - -TEST_F(GeoDBTest, DifferentPosInSameQuadkey) { - // insert obj1 into database - GeoPosition pos1(40.00001, 116.00001); - std::string id1("12"); - std::string value1("value1"); - - GeoObject obj1(pos1, id1, value1); - Status status = getdb()->Insert(obj1); - ASSERT_TRUE(status.ok()); - - // insert obj2 into database - GeoPosition pos2(40.00002, 116.00002); - std::string id2("123"); - std::string value2 = "value2"; - - GeoObject obj2(pos2, id2, value2); - status = getdb()->Insert(obj2); - ASSERT_TRUE(status.ok()); - - // get obj1's quadkey - ReadOptions opt; - PinnableSlice quadkey1; - status = getdb()->Get(opt, getdb()->DefaultColumnFamily(), "k:" + id1, &quadkey1); - ASSERT_TRUE(status.ok()); - - // get obj2's quadkey - PinnableSlice quadkey2; - status = getdb()->Get(opt, getdb()->DefaultColumnFamily(), "k:" + id2, &quadkey2); - ASSERT_TRUE(status.ok()); - - // obj1 and obj2 have the same quadkey - ASSERT_EQ(quadkey1, quadkey2); - - // get obj1 by id, and check value - GeoObject obj; - status = getdb()->GetById(Slice(id1), &obj); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(obj.position.latitude, pos1.latitude); - ASSERT_EQ(obj.position.longitude, pos1.longitude); - ASSERT_EQ(obj.id.compare(id1), 0); - ASSERT_EQ(obj.value, value1); - - // get obj2 by id, and check value - status = getdb()->GetById(Slice(id2), &obj); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(obj.position.latitude, pos2.latitude); - ASSERT_EQ(obj.position.longitude, pos2.longitude); - ASSERT_EQ(obj.id.compare(id2), 0); - ASSERT_EQ(obj.value, value2); -} - -} // namespace rocksdb - -int main(int argc, char* argv[]) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} -#else - -#include - -int main() { - fprintf(stderr, "SKIPPED\n"); - return 0; -} - -#endif // !ROCKSDB_LITE diff --git a/utilities/redis/README b/utilities/redis/README deleted file mode 100644 index 8b17bc05a..000000000 --- a/utilities/redis/README +++ /dev/null @@ -1,14 +0,0 @@ -This folder defines a REDIS-style interface for Rocksdb. -Right now it is written as a simple tag-on in the rocksdb::RedisLists class. -It implements Redis Lists, and supports only the "non-blocking operations". - -Internally, the set of lists are stored in a rocksdb database, mapping keys to -values. Each "value" is the list itself, storing a sequence of "elements". -Each element is stored as a 32-bit-integer, followed by a sequence of bytes. -The 32-bit-integer represents the length of the element (that is, the number -of bytes that follow). And then that many bytes follow. - - -NOTE: This README file may be old. See the actual redis_lists.cc file for -definitive details on the implementation. There should be a header at the top -of that file, explaining a bit of the implementation details. diff --git a/utilities/redis/redis_list_exception.h b/utilities/redis/redis_list_exception.h deleted file mode 100644 index bc2b39a31..000000000 --- a/utilities/redis/redis_list_exception.h +++ /dev/null @@ -1,22 +0,0 @@ -/** - * A simple structure for exceptions in RedisLists. - * - * @author Deon Nicholas (dnicholas@fb.com) - * Copyright 2013 Facebook - */ - -#pragma once -#ifndef ROCKSDB_LITE -#include - -namespace rocksdb { - -class RedisListException: public std::exception { - public: - const char* what() const throw() override { - return "Invalid operation or corrupt data in Redis List."; - } -}; - -} // namespace rocksdb -#endif diff --git a/utilities/redis/redis_list_iterator.h b/utilities/redis/redis_list_iterator.h deleted file mode 100644 index 7bfe20690..000000000 --- a/utilities/redis/redis_list_iterator.h +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright 2013 Facebook -/** - * RedisListIterator: - * An abstraction over the "list" concept (e.g.: for redis lists). - * Provides functionality to read, traverse, edit, and write these lists. - * - * Upon construction, the RedisListIterator is given a block of list data. - * Internally, it stores a pointer to the data and a pointer to current item. - * It also stores a "result" list that will be mutated over time. - * - * Traversal and mutation are done by "forward iteration". - * The Push() and Skip() methods will advance the iterator to the next item. - * However, Push() will also "write the current item to the result". - * Skip() will simply move to next item, causing current item to be dropped. - * - * Upon completion, the result (accessible by WriteResult()) will be saved. - * All "skipped" items will be gone; all "pushed" items will remain. - * - * @throws Any of the operations may throw a RedisListException if an invalid - * operation is performed or if the data is found to be corrupt. - * - * @notes By default, if WriteResult() is called part-way through iteration, - * it will automatically advance the iterator to the end, and Keep() - * all items that haven't been traversed yet. This may be subject - * to review. - * - * @notes Can access the "current" item via GetCurrent(), and other - * list-specific information such as Length(). - * - * @notes The internal representation is due to change at any time. Presently, - * the list is represented as follows: - * - 32-bit integer header: the number of items in the list - * - For each item: - * - 32-bit int (n): the number of bytes representing this item - * - n bytes of data: the actual data. - * - * @author Deon Nicholas (dnicholas@fb.com) - */ - -#pragma once -#ifndef ROCKSDB_LITE - -#include - -#include "redis_list_exception.h" -#include "rocksdb/slice.h" -#include "util/coding.h" - -namespace rocksdb { - -/// An abstraction over the "list" concept. -/// All operations may throw a RedisListException -class RedisListIterator { - public: - /// Construct a redis-list-iterator based on data. - /// If the data is non-empty, it must formatted according to @notes above. - /// - /// If the data is valid, we can assume the following invariant(s): - /// a) length_, num_bytes_ are set correctly. - /// b) cur_byte_ always refers to the start of the current element, - /// just before the bytes that specify element length. - /// c) cur_elem_ is always the index of the current element. - /// d) cur_elem_length_ is always the number of bytes in current element, - /// excluding the 4-byte header itself. - /// e) result_ will always contain data_[0..cur_byte_) and a header - /// f) Whenever corrupt data is encountered or an invalid operation is - /// attempted, a RedisListException will immediately be thrown. - explicit RedisListIterator(const std::string& list_data) - : data_(list_data.data()), - num_bytes_(static_cast(list_data.size())), - cur_byte_(0), - cur_elem_(0), - cur_elem_length_(0), - length_(0), - result_() { - // Initialize the result_ (reserve enough space for header) - InitializeResult(); - - // Parse the data only if it is not empty. - if (num_bytes_ == 0) { - return; - } - - // If non-empty, but less than 4 bytes, data must be corrupt - if (num_bytes_ < sizeof(length_)) { - ThrowError("Corrupt header."); // Will break control flow - } - - // Good. The first bytes specify the number of elements - length_ = DecodeFixed32(data_); - cur_byte_ = sizeof(length_); - - // If we have at least one element, point to that element. - // Also, read the first integer of the element (specifying the size), - // if possible. - if (length_ > 0) { - if (cur_byte_ + sizeof(cur_elem_length_) <= num_bytes_) { - cur_elem_length_ = DecodeFixed32(data_+cur_byte_); - } else { - ThrowError("Corrupt data for first element."); - } - } - - // At this point, we are fully set-up. - // The invariants described in the header should now be true. - } - - /// Reserve some space for the result_. - /// Equivalent to result_.reserve(bytes). - void Reserve(int bytes) { - result_.reserve(bytes); - } - - /// Go to next element in data file. - /// Also writes the current element to result_. - RedisListIterator& Push() { - WriteCurrentElement(); - MoveNext(); - return *this; - } - - /// Go to next element in data file. - /// Drops/skips the current element. It will not be written to result_. - RedisListIterator& Skip() { - MoveNext(); - --length_; // One less item - --cur_elem_; // We moved one forward, but index did not change - return *this; - } - - /// Insert elem into the result_ (just BEFORE the current element / byte) - /// Note: if Done() (i.e.: iterator points to end), this will append elem. - void InsertElement(const Slice& elem) { - // Ensure we are in a valid state - CheckErrors(); - - const int kOrigSize = static_cast(result_.size()); - result_.resize(kOrigSize + SizeOf(elem)); - EncodeFixed32(result_.data() + kOrigSize, - static_cast(elem.size())); - memcpy(result_.data() + kOrigSize + sizeof(uint32_t), elem.data(), - elem.size()); - ++length_; - ++cur_elem_; - } - - /// Access the current element, and save the result into *curElem - void GetCurrent(Slice* curElem) { - // Ensure we are in a valid state - CheckErrors(); - - // Ensure that we are not past the last element. - if (Done()) { - ThrowError("Invalid dereferencing."); - } - - // Dereference the element - *curElem = Slice(data_+cur_byte_+sizeof(cur_elem_length_), - cur_elem_length_); - } - - // Number of elements - int Length() const { - return length_; - } - - // Number of bytes in the final representation (i.e: WriteResult().size()) - int Size() const { - // result_ holds the currently written data - // data_[cur_byte..num_bytes-1] is the remainder of the data - return static_cast(result_.size() + (num_bytes_ - cur_byte_)); - } - - // Reached the end? - bool Done() const { - return cur_byte_ >= num_bytes_ || cur_elem_ >= length_; - } - - /// Returns a string representing the final, edited, data. - /// Assumes that all bytes of data_ in the range [0,cur_byte_) have been read - /// and that result_ contains this data. - /// The rest of the data must still be written. - /// So, this method ADVANCES THE ITERATOR TO THE END before writing. - Slice WriteResult() { - CheckErrors(); - - // The header should currently be filled with dummy data (0's) - // Correctly update the header. - // Note, this is safe since result_ is a vector (guaranteed contiguous) - EncodeFixed32(&result_[0],length_); - - // Append the remainder of the data to the result. - result_.insert(result_.end(),data_+cur_byte_, data_ +num_bytes_); - - // Seek to end of file - cur_byte_ = num_bytes_; - cur_elem_ = length_; - cur_elem_length_ = 0; - - // Return the result - return Slice(result_.data(),result_.size()); - } - - public: // Static public functions - - /// An upper-bound on the amount of bytes needed to store this element. - /// This is used to hide representation information from the client. - /// E.G. This can be used to compute the bytes we want to Reserve(). - static uint32_t SizeOf(const Slice& elem) { - // [Integer Length . Data] - return static_cast(sizeof(uint32_t) + elem.size()); - } - - private: // Private functions - - /// Initializes the result_ string. - /// It will fill the first few bytes with 0's so that there is - /// enough space for header information when we need to write later. - /// Currently, "header information" means: the length (number of elements) - /// Assumes that result_ is empty to begin with - void InitializeResult() { - assert(result_.empty()); // Should always be true. - result_.resize(sizeof(uint32_t),0); // Put a block of 0's as the header - } - - /// Go to the next element (used in Push() and Skip()) - void MoveNext() { - CheckErrors(); - - // Check to make sure we are not already in a finished state - if (Done()) { - ThrowError("Attempting to iterate past end of list."); - } - - // Move forward one element. - cur_byte_ += sizeof(cur_elem_length_) + cur_elem_length_; - ++cur_elem_; - - // If we are at the end, finish - if (Done()) { - cur_elem_length_ = 0; - return; - } - - // Otherwise, we should be able to read the new element's length - if (cur_byte_ + sizeof(cur_elem_length_) > num_bytes_) { - ThrowError("Corrupt element data."); - } - - // Set the new element's length - cur_elem_length_ = DecodeFixed32(data_+cur_byte_); - - return; - } - - /// Append the current element (pointed to by cur_byte_) to result_ - /// Assumes result_ has already been reserved appropriately. - void WriteCurrentElement() { - // First verify that the iterator is still valid. - CheckErrors(); - if (Done()) { - ThrowError("Attempting to write invalid element."); - } - - // Append the cur element. - result_.insert(result_.end(), - data_+cur_byte_, - data_+cur_byte_+ sizeof(uint32_t) + cur_elem_length_); - } - - /// Will ThrowError() if necessary. - /// Checks for common/ubiquitous errors that can arise after most operations. - /// This method should be called before any reading operation. - /// If this function succeeds, then we are guaranteed to be in a valid state. - /// Other member functions should check for errors and ThrowError() also - /// if an error occurs that is specific to it even while in a valid state. - void CheckErrors() { - // Check if any crazy thing has happened recently - if ((cur_elem_ > length_) || // Bad index - (cur_byte_ > num_bytes_) || // No more bytes - (cur_byte_ + cur_elem_length_ > num_bytes_) || // Item too large - (cur_byte_ == num_bytes_ && cur_elem_ != length_) || // Too many items - (cur_elem_ == length_ && cur_byte_ != num_bytes_)) { // Too many bytes - ThrowError("Corrupt data."); - } - } - - /// Will throw an exception based on the passed-in message. - /// This function is guaranteed to STOP THE CONTROL-FLOW. - /// (i.e.: you do not have to call "return" after calling ThrowError) - void ThrowError(const char* const /*msg*/ = nullptr) { - // TODO: For now we ignore the msg parameter. This can be expanded later. - throw RedisListException(); - } - - private: - const char* const data_; // A pointer to the data (the first byte) - const uint32_t num_bytes_; // The number of bytes in this list - - uint32_t cur_byte_; // The current byte being read - uint32_t cur_elem_; // The current element being read - uint32_t cur_elem_length_; // The number of bytes in current element - - uint32_t length_; // The number of elements in this list - std::vector result_; // The output data -}; - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/redis/redis_lists.cc b/utilities/redis/redis_lists.cc deleted file mode 100644 index 3ba7470ec..000000000 --- a/utilities/redis/redis_lists.cc +++ /dev/null @@ -1,552 +0,0 @@ -// Copyright 2013 Facebook -/** - * A (persistent) Redis API built using the rocksdb backend. - * Implements Redis Lists as described on: http://redis.io/commands#list - * - * @throws All functions may throw a RedisListException on error/corruption. - * - * @notes Internally, the set of lists is stored in a rocksdb database, - * mapping keys to values. Each "value" is the list itself, storing - * some kind of internal representation of the data. All the - * representation details are handled by the RedisListIterator class. - * The present file should be oblivious to the representation details, - * handling only the client (Redis) API, and the calls to rocksdb. - * - * @TODO Presently, all operations take at least O(NV) time where - * N is the number of elements in the list, and V is the average - * number of bytes per value in the list. So maybe, with merge operator - * we can improve this to an optimal O(V) amortized time, since we - * wouldn't have to read and re-write the entire list. - * - * @author Deon Nicholas (dnicholas@fb.com) - */ - -#ifndef ROCKSDB_LITE -#include "redis_lists.h" - -#include -#include -#include - -#include "rocksdb/slice.h" -#include "util/coding.h" - -namespace rocksdb -{ - -/// Constructors - -RedisLists::RedisLists(const std::string& db_path, - Options options, bool destructive) - : put_option_(), - get_option_() { - - // Store the name of the database - db_name_ = db_path; - - // If destructive, destroy the DB before re-opening it. - if (destructive) { - DestroyDB(db_name_, Options()); - } - - // Now open and deal with the db - DB* db; - Status s = DB::Open(options, db_name_, &db); - if (!s.ok()) { - std::cerr << "ERROR " << s.ToString() << std::endl; - assert(false); - } - - db_ = std::unique_ptr(db); -} - - -/// Accessors - -// Number of elements in the list associated with key -// : throws RedisListException -int RedisLists::Length(const std::string& key) { - // Extract the string data representing the list. - std::string data; - db_->Get(get_option_, key, &data); - - // Return the length - RedisListIterator it(data); - return it.Length(); -} - -// Get the element at the specified index in the (list: key) -// Returns ("") on out-of-bounds -// : throws RedisListException -bool RedisLists::Index(const std::string& key, int32_t index, - std::string* result) { - // Extract the string data representing the list. - std::string data; - db_->Get(get_option_, key, &data); - - // Handle REDIS negative indices (from the end); fast iff Length() takes O(1) - if (index < 0) { - index = Length(key) - (-index); //replace (-i) with (N-i). - } - - // Iterate through the list until the desired index is found. - int curIndex = 0; - RedisListIterator it(data); - while(curIndex < index && !it.Done()) { - ++curIndex; - it.Skip(); - } - - // If we actually found the index - if (curIndex == index && !it.Done()) { - Slice elem; - it.GetCurrent(&elem); - if (result != nullptr) { - *result = elem.ToString(); - } - - return true; - } else { - return false; - } -} - -// Return a truncated version of the list. -// First, negative values for first/last are interpreted as "end of list". -// So, if first == -1, then it is re-set to index: (Length(key) - 1) -// Then, return exactly those indices i such that first <= i <= last. -// : throws RedisListException -std::vector RedisLists::Range(const std::string& key, - int32_t first, int32_t last) { - // Extract the string data representing the list. - std::string data; - db_->Get(get_option_, key, &data); - - // Handle negative bounds (-1 means last element, etc.) - int listLen = Length(key); - if (first < 0) { - first = listLen - (-first); // Replace (-x) with (N-x) - } - if (last < 0) { - last = listLen - (-last); - } - - // Verify bounds (and truncate the range so that it is valid) - first = std::max(first, 0); - last = std::min(last, listLen-1); - int len = std::max(last-first+1, 0); - - // Initialize the resulting list - std::vector result(len); - - // Traverse the list and update the vector - int curIdx = 0; - Slice elem; - for (RedisListIterator it(data); !it.Done() && curIdx<=last; it.Skip()) { - if (first <= curIdx && curIdx <= last) { - it.GetCurrent(&elem); - result[curIdx-first].assign(elem.data(),elem.size()); - } - - ++curIdx; - } - - // Return the result. Might be empty - return result; -} - -// Print the (list: key) out to stdout. For debugging mostly. Public for now. -void RedisLists::Print(const std::string& key) { - // Extract the string data representing the list. - std::string data; - db_->Get(get_option_, key, &data); - - // Iterate through the list and print the items - Slice elem; - for (RedisListIterator it(data); !it.Done(); it.Skip()) { - it.GetCurrent(&elem); - std::cout << "ITEM " << elem.ToString() << std::endl; - } - - //Now print the byte data - RedisListIterator it(data); - std::cout << "==Printing data==" << std::endl; - std::cout << data.size() << std::endl; - std::cout << it.Size() << " " << it.Length() << std::endl; - Slice result = it.WriteResult(); - std::cout << result.data() << std::endl; - if (true) { - std::cout << "size: " << result.size() << std::endl; - const char* val = result.data(); - for(int i=0; i<(int)result.size(); ++i) { - std::cout << (int)val[i] << " " << (val[i]>=32?val[i]:' ') << std::endl; - } - std::cout << std::endl; - } -} - -/// Insert/Update Functions -/// Note: The "real" insert function is private. See below. - -// InsertBefore and InsertAfter are simply wrappers around the Insert function. -int RedisLists::InsertBefore(const std::string& key, const std::string& pivot, - const std::string& value) { - return Insert(key, pivot, value, false); -} - -int RedisLists::InsertAfter(const std::string& key, const std::string& pivot, - const std::string& value) { - return Insert(key, pivot, value, true); -} - -// Prepend value onto beginning of (list: key) -// : throws RedisListException -int RedisLists::PushLeft(const std::string& key, const std::string& value) { - // Get the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Construct the result - RedisListIterator it(data); - it.Reserve(it.Size() + it.SizeOf(value)); - it.InsertElement(value); - - // Push the data back to the db and return the length - db_->Put(put_option_, key, it.WriteResult()); - return it.Length(); -} - -// Append value onto end of (list: key) -// TODO: Make this O(1) time. Might require MergeOperator. -// : throws RedisListException -int RedisLists::PushRight(const std::string& key, const std::string& value) { - // Get the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Create an iterator to the data and seek to the end. - RedisListIterator it(data); - it.Reserve(it.Size() + it.SizeOf(value)); - while (!it.Done()) { - it.Push(); // Write each element as we go - } - - // Insert the new element at the current position (the end) - it.InsertElement(value); - - // Push it back to the db, and return length - db_->Put(put_option_, key, it.WriteResult()); - return it.Length(); -} - -// Set (list: key)[idx] = val. Return true on success, false on fail. -// : throws RedisListException -bool RedisLists::Set(const std::string& key, int32_t index, - const std::string& value) { - // Get the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Handle negative index for REDIS (meaning -index from end of list) - if (index < 0) { - index = Length(key) - (-index); - } - - // Iterate through the list until we find the element we want - int curIndex = 0; - RedisListIterator it(data); - it.Reserve(it.Size() + it.SizeOf(value)); // Over-estimate is fine - while(curIndex < index && !it.Done()) { - it.Push(); - ++curIndex; - } - - // If not found, return false (this occurs when index was invalid) - if (it.Done() || curIndex != index) { - return false; - } - - // Write the new element value, and drop the previous element value - it.InsertElement(value); - it.Skip(); - - // Write the data to the database - // Check status, since it needs to return true/false guarantee - Status s = db_->Put(put_option_, key, it.WriteResult()); - - // Success - return s.ok(); -} - -/// Delete / Remove / Pop functions - -// Trim (list: key) so that it will only contain the indices from start..stop -// Invalid indices will not generate an error, just empty, -// or the portion of the list that fits in this interval -// : throws RedisListException -bool RedisLists::Trim(const std::string& key, int32_t start, int32_t stop) { - // Get the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Handle negative indices in REDIS - int listLen = Length(key); - if (start < 0) { - start = listLen - (-start); - } - if (stop < 0) { - stop = listLen - (-stop); - } - - // Truncate bounds to only fit in the list - start = std::max(start, 0); - stop = std::min(stop, listLen-1); - - // Construct an iterator for the list. Drop all undesired elements. - int curIndex = 0; - RedisListIterator it(data); - it.Reserve(it.Size()); // Over-estimate - while(!it.Done()) { - // If not within the range, just skip the item (drop it). - // Otherwise, continue as usual. - if (start <= curIndex && curIndex <= stop) { - it.Push(); - } else { - it.Skip(); - } - - // Increment the current index - ++curIndex; - } - - // Write the (possibly empty) result to the database - Status s = db_->Put(put_option_, key, it.WriteResult()); - - // Return true as long as the write succeeded - return s.ok(); -} - -// Return and remove the first element in the list (or "" if empty) -// : throws RedisListException -bool RedisLists::PopLeft(const std::string& key, std::string* result) { - // Get the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Point to first element in the list (if it exists), and get its value/size - RedisListIterator it(data); - if (it.Length() > 0) { // Proceed only if list is non-empty - Slice elem; - it.GetCurrent(&elem); // Store the value of the first element - it.Reserve(it.Size() - it.SizeOf(elem)); - it.Skip(); // DROP the first item and move to next - - // Update the db - db_->Put(put_option_, key, it.WriteResult()); - - // Return the value - if (result != nullptr) { - *result = elem.ToString(); - } - return true; - } else { - return false; - } -} - -// Remove and return the last element in the list (or "" if empty) -// TODO: Make this O(1). Might require MergeOperator. -// : throws RedisListException -bool RedisLists::PopRight(const std::string& key, std::string* result) { - // Extract the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Construct an iterator to the data and move to last element - RedisListIterator it(data); - it.Reserve(it.Size()); - int len = it.Length(); - int curIndex = 0; - while(curIndex < (len-1) && !it.Done()) { - it.Push(); - ++curIndex; - } - - // Extract and drop/skip the last element - if (curIndex == len-1) { - assert(!it.Done()); // Sanity check. Should not have ended here. - - // Extract and pop the element - Slice elem; - it.GetCurrent(&elem); // Save value of element. - it.Skip(); // Skip the element - - // Write the result to the database - db_->Put(put_option_, key, it.WriteResult()); - - // Return the value - if (result != nullptr) { - *result = elem.ToString(); - } - return true; - } else { - // Must have been an empty list - assert(it.Done() && len==0 && curIndex == 0); - return false; - } -} - -// Remove the (first or last) "num" occurrences of value in (list: key) -// : throws RedisListException -int RedisLists::Remove(const std::string& key, int32_t num, - const std::string& value) { - // Negative num ==> RemoveLast; Positive num ==> Remove First - if (num < 0) { - return RemoveLast(key, -num, value); - } else if (num > 0) { - return RemoveFirst(key, num, value); - } else { - return RemoveFirst(key, Length(key), value); - } -} - -// Remove the first "num" occurrences of value in (list: key). -// : throws RedisListException -int RedisLists::RemoveFirst(const std::string& key, int32_t num, - const std::string& value) { - // Ensure that the number is positive - assert(num >= 0); - - // Extract the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Traverse the list, appending all but the desired occurrences of value - int numSkipped = 0; // Keep track of the number of times value is seen - Slice elem; - RedisListIterator it(data); - it.Reserve(it.Size()); - while (!it.Done()) { - it.GetCurrent(&elem); - - if (elem == value && numSkipped < num) { - // Drop this item if desired - it.Skip(); - ++numSkipped; - } else { - // Otherwise keep the item and proceed as normal - it.Push(); - } - } - - // Put the result back to the database - db_->Put(put_option_, key, it.WriteResult()); - - // Return the number of elements removed - return numSkipped; -} - - -// Remove the last "num" occurrences of value in (list: key). -// TODO: I traverse the list 2x. Make faster. Might require MergeOperator. -// : throws RedisListException -int RedisLists::RemoveLast(const std::string& key, int32_t num, - const std::string& value) { - // Ensure that the number is positive - assert(num >= 0); - - // Extract the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Temporary variable to hold the "current element" in the blocks below - Slice elem; - - // Count the total number of occurrences of value - int totalOccs = 0; - for (RedisListIterator it(data); !it.Done(); it.Skip()) { - it.GetCurrent(&elem); - if (elem == value) { - ++totalOccs; - } - } - - // Construct an iterator to the data. Reserve enough space for the result. - RedisListIterator it(data); - int bytesRemoved = std::min(num,totalOccs)*it.SizeOf(value); - it.Reserve(it.Size() - bytesRemoved); - - // Traverse the list, appending all but the desired occurrences of value. - // Note: "Drop the last k occurrences" is equivalent to - // "keep only the first n-k occurrences", where n is total occurrences. - int numKept = 0; // Keep track of the number of times value is kept - while(!it.Done()) { - it.GetCurrent(&elem); - - // If we are within the deletion range and equal to value, drop it. - // Otherwise, append/keep/push it. - if (elem == value) { - if (numKept < totalOccs - num) { - it.Push(); - ++numKept; - } else { - it.Skip(); - } - } else { - // Always append the others - it.Push(); - } - } - - // Put the result back to the database - db_->Put(put_option_, key, it.WriteResult()); - - // Return the number of elements removed - return totalOccs - numKept; -} - -/// Private functions - -// Insert element value into (list: key), right before/after -// the first occurrence of pivot -// : throws RedisListException -int RedisLists::Insert(const std::string& key, const std::string& pivot, - const std::string& value, bool insert_after) { - // Get the original list data - std::string data; - db_->Get(get_option_, key, &data); - - // Construct an iterator to the data and reserve enough space for result. - RedisListIterator it(data); - it.Reserve(it.Size() + it.SizeOf(value)); - - // Iterate through the list until we find the element we want - Slice elem; - bool found = false; - while(!it.Done() && !found) { - it.GetCurrent(&elem); - - // When we find the element, insert the element and mark found - if (elem == pivot) { // Found it! - found = true; - if (insert_after == true) { // Skip one more, if inserting after it - it.Push(); - } - it.InsertElement(value); - } else { - it.Push(); - } - - } - - // Put the data (string) into the database - if (found) { - db_->Put(put_option_, key, it.WriteResult()); - } - - // Returns the new (possibly unchanged) length of the list - return it.Length(); -} - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/redis/redis_lists.h b/utilities/redis/redis_lists.h deleted file mode 100644 index 6c8b9551e..000000000 --- a/utilities/redis/redis_lists.h +++ /dev/null @@ -1,108 +0,0 @@ -/** - * A (persistent) Redis API built using the rocksdb backend. - * Implements Redis Lists as described on: http://redis.io/commands#list - * - * @throws All functions may throw a RedisListException - * - * @author Deon Nicholas (dnicholas@fb.com) - * Copyright 2013 Facebook - */ - -#ifndef ROCKSDB_LITE -#pragma once - -#include -#include "rocksdb/db.h" -#include "redis_list_iterator.h" -#include "redis_list_exception.h" - -namespace rocksdb { - -/// The Redis functionality (see http://redis.io/commands#list) -/// All functions may THROW a RedisListException -class RedisLists { - public: // Constructors / Destructors - /// Construct a new RedisLists database, with name/path of db. - /// Will clear the database on open iff destructive is true (default false). - /// Otherwise, it will restore saved changes. - /// May throw RedisListException - RedisLists(const std::string& db_path, - Options options, bool destructive = false); - - public: // Accessors - /// The number of items in (list: key) - int Length(const std::string& key); - - /// Search the list for the (index)'th item (0-based) in (list:key) - /// A negative index indicates: "from end-of-list" - /// If index is within range: return true, and return the value in *result. - /// If (index < -length OR index>=length), then index is out of range: - /// return false (and *result is left unchanged) - /// May throw RedisListException - bool Index(const std::string& key, int32_t index, - std::string* result); - - /// Return (list: key)[first..last] (inclusive) - /// May throw RedisListException - std::vector Range(const std::string& key, - int32_t first, int32_t last); - - /// Prints the entire (list: key), for debugging. - void Print(const std::string& key); - - public: // Insert/Update - /// Insert value before/after pivot in (list: key). Return the length. - /// May throw RedisListException - int InsertBefore(const std::string& key, const std::string& pivot, - const std::string& value); - int InsertAfter(const std::string& key, const std::string& pivot, - const std::string& value); - - /// Push / Insert value at beginning/end of the list. Return the length. - /// May throw RedisListException - int PushLeft(const std::string& key, const std::string& value); - int PushRight(const std::string& key, const std::string& value); - - /// Set (list: key)[idx] = val. Return true on success, false on fail - /// May throw RedisListException - bool Set(const std::string& key, int32_t index, const std::string& value); - - public: // Delete / Remove / Pop / Trim - /// Trim (list: key) so that it will only contain the indices from start..stop - /// Returns true on success - /// May throw RedisListException - bool Trim(const std::string& key, int32_t start, int32_t stop); - - /// If list is empty, return false and leave *result unchanged. - /// Else, remove the first/last elem, store it in *result, and return true - bool PopLeft(const std::string& key, std::string* result); // First - bool PopRight(const std::string& key, std::string* result); // Last - - /// Remove the first (or last) num occurrences of value from the list (key) - /// Return the number of elements removed. - /// May throw RedisListException - int Remove(const std::string& key, int32_t num, - const std::string& value); - int RemoveFirst(const std::string& key, int32_t num, - const std::string& value); - int RemoveLast(const std::string& key, int32_t num, - const std::string& value); - - private: // Private Functions - /// Calls InsertBefore or InsertAfter - int Insert(const std::string& key, const std::string& pivot, - const std::string& value, bool insert_after); - private: - std::string db_name_; // The actual database name/path - WriteOptions put_option_; - ReadOptions get_option_; - - /// The backend rocksdb database. - /// Map : key --> list - /// where a list is a sequence of elements - /// and an element is a 4-byte integer (n), followed by n bytes of data - std::unique_ptr db_; -}; - -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/redis/redis_lists_test.cc b/utilities/redis/redis_lists_test.cc deleted file mode 100644 index 961d87de7..000000000 --- a/utilities/redis/redis_lists_test.cc +++ /dev/null @@ -1,894 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -/** - * A test harness for the Redis API built on rocksdb. - * - * USAGE: Build with: "make redis_test" (in rocksdb directory). - * Run unit tests with: "./redis_test" - * Manual/Interactive user testing: "./redis_test -m" - * Manual user testing + restart database: "./redis_test -m -d" - * - * TODO: Add LARGE random test cases to verify efficiency and scalability - * - * @author Deon Nicholas (dnicholas@fb.com) - */ - -#ifndef ROCKSDB_LITE - -#include -#include - -#include "redis_lists.h" -#include "util/testharness.h" -#include "util/random.h" - -using namespace rocksdb; - -namespace rocksdb { - -class RedisListsTest : public testing::Test { - public: - static const std::string kDefaultDbName; - static Options options; - - RedisListsTest() { - options.create_if_missing = true; - } -}; - -const std::string RedisListsTest::kDefaultDbName = - test::PerThreadDBPath("redis_lists_test"); -Options RedisListsTest::options = Options(); - -// operator== and operator<< are defined below for vectors (lists) -// Needed for ASSERT_EQ - -namespace { -void AssertListEq(const std::vector& result, - const std::vector& expected_result) { - ASSERT_EQ(result.size(), expected_result.size()); - for (size_t i = 0; i < result.size(); ++i) { - ASSERT_EQ(result[i], expected_result[i]); - } -} -} // namespace - -// PushRight, Length, Index, Range -TEST_F(RedisListsTest, SimpleTest) { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // Simple PushRight (should return the new length each time) - ASSERT_EQ(redis.PushRight("k1", "v1"), 1); - ASSERT_EQ(redis.PushRight("k1", "v2"), 2); - ASSERT_EQ(redis.PushRight("k1", "v3"), 3); - - // Check Length and Index() functions - ASSERT_EQ(redis.Length("k1"), 3); // Check length - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "v1"); // Check valid indices - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "v2"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "v3"); - - // Check range function and vectors - std::vector result = redis.Range("k1", 0, 2); // Get the list - std::vector expected_result(3); - expected_result[0] = "v1"; - expected_result[1] = "v2"; - expected_result[2] = "v3"; - AssertListEq(result, expected_result); -} - -// PushLeft, Length, Index, Range -TEST_F(RedisListsTest, SimpleTest2) { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // Simple PushRight - ASSERT_EQ(redis.PushLeft("k1", "v3"), 1); - ASSERT_EQ(redis.PushLeft("k1", "v2"), 2); - ASSERT_EQ(redis.PushLeft("k1", "v1"), 3); - - // Check Length and Index() functions - ASSERT_EQ(redis.Length("k1"), 3); // Check length - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "v1"); // Check valid indices - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "v2"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "v3"); - - // Check range function and vectors - std::vector result = redis.Range("k1", 0, 2); // Get the list - std::vector expected_result(3); - expected_result[0] = "v1"; - expected_result[1] = "v2"; - expected_result[2] = "v3"; - AssertListEq(result, expected_result); -} - -// Exhaustive test of the Index() function -TEST_F(RedisListsTest, IndexTest) { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // Empty Index check (return empty and should not crash or edit tempv) - tempv = "yo"; - ASSERT_TRUE(!redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "yo"); - ASSERT_TRUE(!redis.Index("fda", 3, &tempv)); - ASSERT_EQ(tempv, "yo"); - ASSERT_TRUE(!redis.Index("random", -12391, &tempv)); - ASSERT_EQ(tempv, "yo"); - - // Simple Pushes (will yield: [v6, v4, v4, v1, v2, v3] - redis.PushRight("k1", "v1"); - redis.PushRight("k1", "v2"); - redis.PushRight("k1", "v3"); - redis.PushLeft("k1", "v4"); - redis.PushLeft("k1", "v4"); - redis.PushLeft("k1", "v6"); - - // Simple, non-negative indices - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "v6"); - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "v4"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "v4"); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "v1"); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "v2"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "v3"); - - // Negative indices - ASSERT_TRUE(redis.Index("k1", -6, &tempv)); - ASSERT_EQ(tempv, "v6"); - ASSERT_TRUE(redis.Index("k1", -5, &tempv)); - ASSERT_EQ(tempv, "v4"); - ASSERT_TRUE(redis.Index("k1", -4, &tempv)); - ASSERT_EQ(tempv, "v4"); - ASSERT_TRUE(redis.Index("k1", -3, &tempv)); - ASSERT_EQ(tempv, "v1"); - ASSERT_TRUE(redis.Index("k1", -2, &tempv)); - ASSERT_EQ(tempv, "v2"); - ASSERT_TRUE(redis.Index("k1", -1, &tempv)); - ASSERT_EQ(tempv, "v3"); - - // Out of bounds (return empty, no crash) - ASSERT_TRUE(!redis.Index("k1", 6, &tempv)); - ASSERT_TRUE(!redis.Index("k1", 123219, &tempv)); - ASSERT_TRUE(!redis.Index("k1", -7, &tempv)); - ASSERT_TRUE(!redis.Index("k1", -129, &tempv)); -} - - -// Exhaustive test of the Range() function -TEST_F(RedisListsTest, RangeTest) { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // Simple Pushes (will yield: [v6, v4, v4, v1, v2, v3]) - redis.PushRight("k1", "v1"); - redis.PushRight("k1", "v2"); - redis.PushRight("k1", "v3"); - redis.PushLeft("k1", "v4"); - redis.PushLeft("k1", "v4"); - redis.PushLeft("k1", "v6"); - - // Sanity check (check the length; make sure it's 6) - ASSERT_EQ(redis.Length("k1"), 6); - - // Simple range - std::vector res = redis.Range("k1", 1, 4); - ASSERT_EQ((int)res.size(), 4); - ASSERT_EQ(res[0], "v4"); - ASSERT_EQ(res[1], "v4"); - ASSERT_EQ(res[2], "v1"); - ASSERT_EQ(res[3], "v2"); - - // Negative indices (i.e.: measured from the end) - res = redis.Range("k1", 2, -1); - ASSERT_EQ((int)res.size(), 4); - ASSERT_EQ(res[0], "v4"); - ASSERT_EQ(res[1], "v1"); - ASSERT_EQ(res[2], "v2"); - ASSERT_EQ(res[3], "v3"); - - res = redis.Range("k1", -6, -4); - ASSERT_EQ((int)res.size(), 3); - ASSERT_EQ(res[0], "v6"); - ASSERT_EQ(res[1], "v4"); - ASSERT_EQ(res[2], "v4"); - - res = redis.Range("k1", -1, 5); - ASSERT_EQ((int)res.size(), 1); - ASSERT_EQ(res[0], "v3"); - - // Partial / Broken indices - res = redis.Range("k1", -3, 1000000); - ASSERT_EQ((int)res.size(), 3); - ASSERT_EQ(res[0], "v1"); - ASSERT_EQ(res[1], "v2"); - ASSERT_EQ(res[2], "v3"); - - res = redis.Range("k1", -1000000, 1); - ASSERT_EQ((int)res.size(), 2); - ASSERT_EQ(res[0], "v6"); - ASSERT_EQ(res[1], "v4"); - - // Invalid indices - res = redis.Range("k1", 7, 9); - ASSERT_EQ((int)res.size(), 0); - - res = redis.Range("k1", -8, -7); - ASSERT_EQ((int)res.size(), 0); - - res = redis.Range("k1", 3, 2); - ASSERT_EQ((int)res.size(), 0); - - res = redis.Range("k1", 5, -2); - ASSERT_EQ((int)res.size(), 0); - - // Range matches Index - res = redis.Range("k1", -6, -4); - ASSERT_TRUE(redis.Index("k1", -6, &tempv)); - ASSERT_EQ(tempv, res[0]); - ASSERT_TRUE(redis.Index("k1", -5, &tempv)); - ASSERT_EQ(tempv, res[1]); - ASSERT_TRUE(redis.Index("k1", -4, &tempv)); - ASSERT_EQ(tempv, res[2]); - - // Last check - res = redis.Range("k1", 0, -6); - ASSERT_EQ((int)res.size(), 1); - ASSERT_EQ(res[0], "v6"); -} - -// Exhaustive test for InsertBefore(), and InsertAfter() -TEST_F(RedisListsTest, InsertTest) { - RedisLists redis(kDefaultDbName, options, true); - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // Insert on empty list (return 0, and do not crash) - ASSERT_EQ(redis.InsertBefore("k1", "non-exist", "a"), 0); - ASSERT_EQ(redis.InsertAfter("k1", "other-non-exist", "c"), 0); - ASSERT_EQ(redis.Length("k1"), 0); - - // Push some preliminary stuff [g, f, e, d, c, b, a] - redis.PushLeft("k1", "a"); - redis.PushLeft("k1", "b"); - redis.PushLeft("k1", "c"); - redis.PushLeft("k1", "d"); - redis.PushLeft("k1", "e"); - redis.PushLeft("k1", "f"); - redis.PushLeft("k1", "g"); - ASSERT_EQ(redis.Length("k1"), 7); - - // Test InsertBefore - int newLength = redis.InsertBefore("k1", "e", "hello"); - ASSERT_EQ(newLength, 8); - ASSERT_EQ(redis.Length("k1"), newLength); - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "f"); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "e"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "hello"); - - // Test InsertAfter - newLength = redis.InsertAfter("k1", "c", "bye"); - ASSERT_EQ(newLength, 9); - ASSERT_EQ(redis.Length("k1"), newLength); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "bye"); - - // Test bad value on InsertBefore - newLength = redis.InsertBefore("k1", "yo", "x"); - ASSERT_EQ(newLength, 9); - ASSERT_EQ(redis.Length("k1"), newLength); - - // Test bad value on InsertAfter - newLength = redis.InsertAfter("k1", "xxxx", "y"); - ASSERT_EQ(newLength, 9); - ASSERT_EQ(redis.Length("k1"), newLength); - - // Test InsertBefore beginning - newLength = redis.InsertBefore("k1", "g", "begggggggggggggggg"); - ASSERT_EQ(newLength, 10); - ASSERT_EQ(redis.Length("k1"), newLength); - - // Test InsertAfter end - newLength = redis.InsertAfter("k1", "a", "enddd"); - ASSERT_EQ(newLength, 11); - ASSERT_EQ(redis.Length("k1"), newLength); - - // Make sure nothing weird happened. - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "begggggggggggggggg"); - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "g"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "f"); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "hello"); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "e"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "d"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "c"); - ASSERT_TRUE(redis.Index("k1", 7, &tempv)); - ASSERT_EQ(tempv, "bye"); - ASSERT_TRUE(redis.Index("k1", 8, &tempv)); - ASSERT_EQ(tempv, "b"); - ASSERT_TRUE(redis.Index("k1", 9, &tempv)); - ASSERT_EQ(tempv, "a"); - ASSERT_TRUE(redis.Index("k1", 10, &tempv)); - ASSERT_EQ(tempv, "enddd"); -} - -// Exhaustive test of Set function -TEST_F(RedisListsTest, SetTest) { - RedisLists redis(kDefaultDbName, options, true); - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // Set on empty list (return false, and do not crash) - ASSERT_EQ(redis.Set("k1", 7, "a"), false); - ASSERT_EQ(redis.Set("k1", 0, "a"), false); - ASSERT_EQ(redis.Set("k1", -49, "cx"), false); - ASSERT_EQ(redis.Length("k1"), 0); - - // Push some preliminary stuff [g, f, e, d, c, b, a] - redis.PushLeft("k1", "a"); - redis.PushLeft("k1", "b"); - redis.PushLeft("k1", "c"); - redis.PushLeft("k1", "d"); - redis.PushLeft("k1", "e"); - redis.PushLeft("k1", "f"); - redis.PushLeft("k1", "g"); - ASSERT_EQ(redis.Length("k1"), 7); - - // Test Regular Set - ASSERT_TRUE(redis.Set("k1", 0, "0")); - ASSERT_TRUE(redis.Set("k1", 3, "3")); - ASSERT_TRUE(redis.Set("k1", 6, "6")); - ASSERT_TRUE(redis.Set("k1", 2, "2")); - ASSERT_TRUE(redis.Set("k1", 5, "5")); - ASSERT_TRUE(redis.Set("k1", 1, "1")); - ASSERT_TRUE(redis.Set("k1", 4, "4")); - - ASSERT_EQ(redis.Length("k1"), 7); // Size should not change - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "0"); - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "1"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "2"); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "3"); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "4"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "5"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "6"); - - // Set with negative indices - ASSERT_TRUE(redis.Set("k1", -7, "a")); - ASSERT_TRUE(redis.Set("k1", -4, "d")); - ASSERT_TRUE(redis.Set("k1", -1, "g")); - ASSERT_TRUE(redis.Set("k1", -5, "c")); - ASSERT_TRUE(redis.Set("k1", -2, "f")); - ASSERT_TRUE(redis.Set("k1", -6, "b")); - ASSERT_TRUE(redis.Set("k1", -3, "e")); - - ASSERT_EQ(redis.Length("k1"), 7); // Size should not change - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "a"); - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "b"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "c"); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "d"); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "e"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "f"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "g"); - - // Bad indices (just out-of-bounds / off-by-one check) - ASSERT_EQ(redis.Set("k1", -8, "off-by-one in negative index"), false); - ASSERT_EQ(redis.Set("k1", 7, "off-by-one-error in positive index"), false); - ASSERT_EQ(redis.Set("k1", 43892, "big random index should fail"), false); - ASSERT_EQ(redis.Set("k1", -21391, "large negative index should fail"), false); - - // One last check (to make sure nothing weird happened) - ASSERT_EQ(redis.Length("k1"), 7); // Size should not change - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "a"); - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "b"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "c"); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "d"); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "e"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "f"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "g"); -} - -// Testing Insert, Push, and Set, in a mixed environment -TEST_F(RedisListsTest, InsertPushSetTest) { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // A series of pushes and insertions - // Will result in [newbegin, z, a, aftera, x, newend] - // Also, check the return value sometimes (should return length) - int lengthCheck; - lengthCheck = redis.PushLeft("k1", "a"); - ASSERT_EQ(lengthCheck, 1); - redis.PushLeft("k1", "z"); - redis.PushRight("k1", "x"); - lengthCheck = redis.InsertAfter("k1", "a", "aftera"); - ASSERT_EQ(lengthCheck , 4); - redis.InsertBefore("k1", "z", "newbegin"); // InsertBefore beginning of list - redis.InsertAfter("k1", "x", "newend"); // InsertAfter end of list - - // Check - std::vector res = redis.Range("k1", 0, -1); // Get the list - ASSERT_EQ((int)res.size(), 6); - ASSERT_EQ(res[0], "newbegin"); - ASSERT_EQ(res[5], "newend"); - ASSERT_EQ(res[3], "aftera"); - - // Testing duplicate values/pivots (multiple occurrences of 'a') - ASSERT_TRUE(redis.Set("k1", 0, "a")); // [a, z, a, aftera, x, newend] - redis.InsertAfter("k1", "a", "happy"); // [a, happy, z, a, aftera, ...] - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "happy"); - redis.InsertBefore("k1", "a", "sad"); // [sad, a, happy, z, a, aftera, ...] - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "sad"); - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "happy"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "aftera"); - redis.InsertAfter("k1", "a", "zz"); // [sad, a, zz, happy, z, a, aftera, ...] - ASSERT_TRUE(redis.Index("k1", 2, &tempv)); - ASSERT_EQ(tempv, "zz"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "aftera"); - ASSERT_TRUE(redis.Set("k1", 1, "nota")); // [sad, nota, zz, happy, z, a, ...] - redis.InsertBefore("k1", "a", "ba"); // [sad, nota, zz, happy, z, ba, a, ...] - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "z"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "ba"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "a"); - - // We currently have: [sad, nota, zz, happy, z, ba, a, aftera, x, newend] - // redis.Print("k1"); // manually check - - // Test Inserting before/after non-existent values - lengthCheck = redis.Length("k1"); // Ensure that the length doesn't change - ASSERT_EQ(lengthCheck, 10); - ASSERT_EQ(redis.InsertBefore("k1", "non-exist", "randval"), lengthCheck); - ASSERT_EQ(redis.InsertAfter("k1", "nothing", "a"), lengthCheck); - ASSERT_EQ(redis.InsertAfter("randKey", "randVal", "ranValue"), 0); // Empty - ASSERT_EQ(redis.Length("k1"), lengthCheck); // The length should not change - - // Simply Test the Set() function - redis.Set("k1", 5, "ba2"); - redis.InsertBefore("k1", "ba2", "beforeba2"); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "z"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "beforeba2"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "ba2"); - ASSERT_TRUE(redis.Index("k1", 7, &tempv)); - ASSERT_EQ(tempv, "a"); - - // We have: [sad, nota, zz, happy, z, beforeba2, ba2, a, aftera, x, newend] - - // Set() with negative indices - redis.Set("k1", -1, "endprank"); - ASSERT_TRUE(!redis.Index("k1", 11, &tempv)); - ASSERT_TRUE(redis.Index("k1", 10, &tempv)); - ASSERT_EQ(tempv, "endprank"); // Ensure Set worked correctly - redis.Set("k1", -11, "t"); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "t"); - - // Test out of bounds Set - ASSERT_EQ(redis.Set("k1", -12, "ssd"), false); - ASSERT_EQ(redis.Set("k1", 11, "sasd"), false); - ASSERT_EQ(redis.Set("k1", 1200, "big"), false); -} - -// Testing Trim, Pop -TEST_F(RedisListsTest, TrimPopTest) { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // A series of pushes and insertions - // Will result in [newbegin, z, a, aftera, x, newend] - redis.PushLeft("k1", "a"); - redis.PushLeft("k1", "z"); - redis.PushRight("k1", "x"); - redis.InsertBefore("k1", "z", "newbegin"); // InsertBefore start of list - redis.InsertAfter("k1", "x", "newend"); // InsertAfter end of list - redis.InsertAfter("k1", "a", "aftera"); - - // Simple PopLeft/Right test - ASSERT_TRUE(redis.PopLeft("k1", &tempv)); - ASSERT_EQ(tempv, "newbegin"); - ASSERT_EQ(redis.Length("k1"), 5); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "z"); - ASSERT_TRUE(redis.PopRight("k1", &tempv)); - ASSERT_EQ(tempv, "newend"); - ASSERT_EQ(redis.Length("k1"), 4); - ASSERT_TRUE(redis.Index("k1", -1, &tempv)); - ASSERT_EQ(tempv, "x"); - - // Now have: [z, a, aftera, x] - - // Test Trim - ASSERT_TRUE(redis.Trim("k1", 0, -1)); // [z, a, aftera, x] (do nothing) - ASSERT_EQ(redis.Length("k1"), 4); - ASSERT_TRUE(redis.Trim("k1", 0, 2)); // [z, a, aftera] - ASSERT_EQ(redis.Length("k1"), 3); - ASSERT_TRUE(redis.Index("k1", -1, &tempv)); - ASSERT_EQ(tempv, "aftera"); - ASSERT_TRUE(redis.Trim("k1", 1, 1)); // [a] - ASSERT_EQ(redis.Length("k1"), 1); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "a"); - - // Test out of bounds (empty) trim - ASSERT_TRUE(redis.Trim("k1", 1, 0)); - ASSERT_EQ(redis.Length("k1"), 0); - - // Popping with empty list (return empty without error) - ASSERT_TRUE(!redis.PopLeft("k1", &tempv)); - ASSERT_TRUE(!redis.PopRight("k1", &tempv)); - ASSERT_TRUE(redis.Trim("k1", 0, 5)); - - // Exhaustive Trim test (negative and invalid indices) - // Will start in [newbegin, z, a, aftera, x, newend] - redis.PushLeft("k1", "a"); - redis.PushLeft("k1", "z"); - redis.PushRight("k1", "x"); - redis.InsertBefore("k1", "z", "newbegin"); // InsertBefore start of list - redis.InsertAfter("k1", "x", "newend"); // InsertAfter end of list - redis.InsertAfter("k1", "a", "aftera"); - ASSERT_TRUE(redis.Trim("k1", -6, -1)); // Should do nothing - ASSERT_EQ(redis.Length("k1"), 6); - ASSERT_TRUE(redis.Trim("k1", 1, -2)); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "z"); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "x"); - ASSERT_EQ(redis.Length("k1"), 4); - ASSERT_TRUE(redis.Trim("k1", -3, -2)); - ASSERT_EQ(redis.Length("k1"), 2); -} - -// Testing Remove, RemoveFirst, RemoveLast -TEST_F(RedisListsTest, RemoveTest) { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // A series of pushes and insertions - // Will result in [newbegin, z, a, aftera, x, newend, a, a] - redis.PushLeft("k1", "a"); - redis.PushLeft("k1", "z"); - redis.PushRight("k1", "x"); - redis.InsertBefore("k1", "z", "newbegin"); // InsertBefore start of list - redis.InsertAfter("k1", "x", "newend"); // InsertAfter end of list - redis.InsertAfter("k1", "a", "aftera"); - redis.PushRight("k1", "a"); - redis.PushRight("k1", "a"); - - // Verify - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "newbegin"); - ASSERT_TRUE(redis.Index("k1", -1, &tempv)); - ASSERT_EQ(tempv, "a"); - - // Check RemoveFirst (Remove the first two 'a') - // Results in [newbegin, z, aftera, x, newend, a] - int numRemoved = redis.Remove("k1", 2, "a"); - ASSERT_EQ(numRemoved, 2); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "newbegin"); - ASSERT_TRUE(redis.Index("k1", 1, &tempv)); - ASSERT_EQ(tempv, "z"); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "newend"); - ASSERT_TRUE(redis.Index("k1", 5, &tempv)); - ASSERT_EQ(tempv, "a"); - ASSERT_EQ(redis.Length("k1"), 6); - - // Repopulate some stuff - // Results in: [x, x, x, x, x, newbegin, z, x, aftera, x, newend, a, x] - redis.PushLeft("k1", "x"); - redis.PushLeft("k1", "x"); - redis.PushLeft("k1", "x"); - redis.PushLeft("k1", "x"); - redis.PushLeft("k1", "x"); - redis.PushRight("k1", "x"); - redis.InsertAfter("k1", "z", "x"); - - // Test removal from end - numRemoved = redis.Remove("k1", -2, "x"); - ASSERT_EQ(numRemoved, 2); - ASSERT_TRUE(redis.Index("k1", 8, &tempv)); - ASSERT_EQ(tempv, "aftera"); - ASSERT_TRUE(redis.Index("k1", 9, &tempv)); - ASSERT_EQ(tempv, "newend"); - ASSERT_TRUE(redis.Index("k1", 10, &tempv)); - ASSERT_EQ(tempv, "a"); - ASSERT_TRUE(!redis.Index("k1", 11, &tempv)); - numRemoved = redis.Remove("k1", -2, "x"); - ASSERT_EQ(numRemoved, 2); - ASSERT_TRUE(redis.Index("k1", 4, &tempv)); - ASSERT_EQ(tempv, "newbegin"); - ASSERT_TRUE(redis.Index("k1", 6, &tempv)); - ASSERT_EQ(tempv, "aftera"); - - // We now have: [x, x, x, x, newbegin, z, aftera, newend, a] - ASSERT_EQ(redis.Length("k1"), 9); - ASSERT_TRUE(redis.Index("k1", -1, &tempv)); - ASSERT_EQ(tempv, "a"); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "x"); - - // Test over-shooting (removing more than there exists) - numRemoved = redis.Remove("k1", -9000, "x"); - ASSERT_EQ(numRemoved , 4); // Only really removed 4 - ASSERT_EQ(redis.Length("k1"), 5); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "newbegin"); - numRemoved = redis.Remove("k1", 1, "x"); - ASSERT_EQ(numRemoved, 0); - - // Try removing ALL! - numRemoved = redis.Remove("k1", 0, "newbegin"); // REMOVE 0 will remove all! - ASSERT_EQ(numRemoved, 1); - - // Removal from an empty-list - ASSERT_TRUE(redis.Trim("k1", 1, 0)); - numRemoved = redis.Remove("k1", 1, "z"); - ASSERT_EQ(numRemoved, 0); -} - - -// Test Multiple keys and Persistence -TEST_F(RedisListsTest, PersistenceMultiKeyTest) { - std::string tempv; // Used below for all Index(), PopRight(), PopLeft() - - // Block one: populate a single key in the database - { - RedisLists redis(kDefaultDbName, options, true); // Destructive - - // A series of pushes and insertions - // Will result in [newbegin, z, a, aftera, x, newend, a, a] - redis.PushLeft("k1", "a"); - redis.PushLeft("k1", "z"); - redis.PushRight("k1", "x"); - redis.InsertBefore("k1", "z", "newbegin"); // InsertBefore start of list - redis.InsertAfter("k1", "x", "newend"); // InsertAfter end of list - redis.InsertAfter("k1", "a", "aftera"); - redis.PushRight("k1", "a"); - redis.PushRight("k1", "a"); - - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "aftera"); - } - - // Block two: make sure changes were saved and add some other key - { - RedisLists redis(kDefaultDbName, options, false); // Persistent, non-destructive - - // Check - ASSERT_EQ(redis.Length("k1"), 8); - ASSERT_TRUE(redis.Index("k1", 3, &tempv)); - ASSERT_EQ(tempv, "aftera"); - - redis.PushRight("k2", "randomkey"); - redis.PushLeft("k2", "sas"); - - redis.PopLeft("k1", &tempv); - } - - // Block three: Verify the changes from block 2 - { - RedisLists redis(kDefaultDbName, options, false); // Persistent, non-destructive - - // Check - ASSERT_EQ(redis.Length("k1"), 7); - ASSERT_EQ(redis.Length("k2"), 2); - ASSERT_TRUE(redis.Index("k1", 0, &tempv)); - ASSERT_EQ(tempv, "z"); - ASSERT_TRUE(redis.Index("k2", -2, &tempv)); - ASSERT_EQ(tempv, "sas"); - } -} - -/// THE manual REDIS TEST begins here -/// THIS WILL ONLY OCCUR IF YOU RUN: ./redis_test -m - -namespace { -void MakeUpper(std::string* const s) { - int len = static_cast(s->length()); - for (int i = 0; i < len; ++i) { - (*s)[i] = static_cast(toupper((*s)[i])); // C-version defined in - } -} - -/// Allows the user to enter in REDIS commands into the command-line. -/// This is useful for manual / interacticve testing / debugging. -/// Use destructive=true to clean the database before use. -/// Use destructive=false to remember the previous state (i.e.: persistent) -/// Should be called from main function. -int manual_redis_test(bool destructive){ - RedisLists redis(RedisListsTest::kDefaultDbName, - RedisListsTest::options, - destructive); - - // TODO: Right now, please use spaces to separate each word. - // In actual redis, you can use quotes to specify compound values - // Example: RPUSH mylist "this is a compound value" - - std::string command; - while(true) { - std::cin >> command; - MakeUpper(&command); - - if (command == "LINSERT") { - std::string k, t, p, v; - std::cin >> k >> t >> p >> v; - MakeUpper(&t); - if (t=="BEFORE") { - std::cout << redis.InsertBefore(k, p, v) << std::endl; - } else if (t=="AFTER") { - std::cout << redis.InsertAfter(k, p, v) << std::endl; - } - } else if (command == "LPUSH") { - std::string k, v; - std::cin >> k >> v; - redis.PushLeft(k, v); - } else if (command == "RPUSH") { - std::string k, v; - std::cin >> k >> v; - redis.PushRight(k, v); - } else if (command == "LPOP") { - std::string k; - std::cin >> k; - std::string res; - redis.PopLeft(k, &res); - std::cout << res << std::endl; - } else if (command == "RPOP") { - std::string k; - std::cin >> k; - std::string res; - redis.PopRight(k, &res); - std::cout << res << std::endl; - } else if (command == "LREM") { - std::string k; - int amt; - std::string v; - - std::cin >> k >> amt >> v; - std::cout << redis.Remove(k, amt, v) << std::endl; - } else if (command == "LLEN") { - std::string k; - std::cin >> k; - std::cout << redis.Length(k) << std::endl; - } else if (command == "LRANGE") { - std::string k; - int i, j; - std::cin >> k >> i >> j; - std::vector res = redis.Range(k, i, j); - for (auto it = res.begin(); it != res.end(); ++it) { - std::cout << " " << (*it); - } - std::cout << std::endl; - } else if (command == "LTRIM") { - std::string k; - int i, j; - std::cin >> k >> i >> j; - redis.Trim(k, i, j); - } else if (command == "LSET") { - std::string k; - int idx; - std::string v; - std::cin >> k >> idx >> v; - redis.Set(k, idx, v); - } else if (command == "LINDEX") { - std::string k; - int idx; - std::cin >> k >> idx; - std::string res; - redis.Index(k, idx, &res); - std::cout << res << std::endl; - } else if (command == "PRINT") { // Added by Deon - std::string k; - std::cin >> k; - redis.Print(k); - } else if (command == "QUIT") { - return 0; - } else { - std::cout << "unknown command: " << command << std::endl; - } - } -} -} // namespace - -} // namespace rocksdb - - -// USAGE: "./redis_test" for default (unit tests) -// "./redis_test -m" for manual testing (redis command api) -// "./redis_test -m -d" for destructive manual test (erase db before use) - - -namespace { -// Check for "want" argument in the argument list -bool found_arg(int argc, char* argv[], const char* want){ - for(int i=1; i - -int main(int /*argc*/, char** /*argv*/) { - fprintf(stderr, "SKIPPED as redis is not supported in ROCKSDB_LITE\n"); - return 0; -} - -#endif // !ROCKSDB_LITE diff --git a/utilities/spatialdb/spatial_db.cc b/utilities/spatialdb/spatial_db.cc deleted file mode 100644 index b34976eb8..000000000 --- a/utilities/spatialdb/spatial_db.cc +++ /dev/null @@ -1,919 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef ROCKSDB_LITE - -#include "rocksdb/utilities/spatial_db.h" - -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rocksdb/cache.h" -#include "rocksdb/options.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "rocksdb/db.h" -#include "rocksdb/utilities/stackable_db.h" -#include "util/coding.h" -#include "utilities/spatialdb/utils.h" -#include "port/port.h" - -namespace rocksdb { -namespace spatial { - -// Column families are used to store element's data and spatial indexes. We use -// [default] column family to store the element data. This is the format of -// [default] column family: -// * id (fixed 64 big endian) -> blob (length prefixed slice) feature_set -// (serialized) -// We have one additional column family for each spatial index. The name of the -// column family is [spatial$]. The format is: -// * quad_key (fixed 64 bit big endian) id (fixed 64 bit big endian) -> "" -// We store information about indexes in [metadata] column family. Format is: -// * spatial$ -> bbox (4 double encodings) tile_bits -// (varint32) - -namespace { -const std::string kMetadataColumnFamilyName("metadata"); -inline std::string GetSpatialIndexColumnFamilyName( - const std::string& spatial_index_name) { - return "spatial$" + spatial_index_name; -} -inline bool GetSpatialIndexName(const std::string& column_family_name, - Slice* dst) { - *dst = Slice(column_family_name); - if (dst->starts_with("spatial$")) { - dst->remove_prefix(8); // strlen("spatial$") - return true; - } - return false; -} - -} // namespace - -void Variant::Init(const Variant& v, Data& d) { - switch (v.type_) { - case kNull: - break; - case kBool: - d.b = v.data_.b; - break; - case kInt: - d.i = v.data_.i; - break; - case kDouble: - d.d = v.data_.d; - break; - case kString: - new (d.s) std::string(*GetStringPtr(v.data_)); - break; - default: - assert(false); - } -} - -Variant& Variant::operator=(const Variant& v) { - // Construct first a temp so exception from a string ctor - // does not change this object - Data tmp; - Init(v, tmp); - - Type thisType = type_; - // Boils down to copying bits so safe - std::swap(tmp, data_); - type_ = v.type_; - - Destroy(thisType, tmp); - - return *this; -} - -Variant& Variant::operator=(Variant&& rhs) { - Destroy(type_, data_); - if (rhs.type_ == kString) { - new (data_.s) std::string(std::move(*GetStringPtr(rhs.data_))); - } else { - data_ = rhs.data_; - } - type_ = rhs.type_; - rhs.type_ = kNull; - return *this; -} - -bool Variant::operator==(const Variant& rhs) const { - if (type_ != rhs.type_) { - return false; - } - - switch (type_) { - case kNull: - return true; - case kBool: - return data_.b == rhs.data_.b; - case kInt: - return data_.i == rhs.data_.i; - case kDouble: - return data_.d == rhs.data_.d; - case kString: - return *GetStringPtr(data_) == *GetStringPtr(rhs.data_); - default: - assert(false); - } - // it will never reach here, but otherwise the compiler complains - return false; -} - -FeatureSet* FeatureSet::Set(const std::string& key, const Variant& value) { - map_.insert({key, value}); - return this; -} - -bool FeatureSet::Contains(const std::string& key) const { - return map_.find(key) != map_.end(); -} - -const Variant& FeatureSet::Get(const std::string& key) const { - auto itr = map_.find(key); - assert(itr != map_.end()); - return itr->second; -} - -FeatureSet::iterator FeatureSet::Find(const std::string& key) const { - return iterator(map_.find(key)); -} - -void FeatureSet::Clear() { map_.clear(); } - -void FeatureSet::Serialize(std::string* output) const { - for (const auto& iter : map_) { - PutLengthPrefixedSlice(output, iter.first); - output->push_back(static_cast(iter.second.type())); - switch (iter.second.type()) { - case Variant::kNull: - break; - case Variant::kBool: - output->push_back(static_cast(iter.second.get_bool())); - break; - case Variant::kInt: - PutVarint64(output, iter.second.get_int()); - break; - case Variant::kDouble: { - PutDouble(output, iter.second.get_double()); - break; - } - case Variant::kString: - PutLengthPrefixedSlice(output, iter.second.get_string()); - break; - default: - assert(false); - } - } -} - -bool FeatureSet::Deserialize(const Slice& input) { - assert(map_.empty()); - Slice s(input); - while (s.size()) { - Slice key; - if (!GetLengthPrefixedSlice(&s, &key) || s.size() == 0) { - return false; - } - char type = s[0]; - s.remove_prefix(1); - switch (type) { - case Variant::kNull: { - map_.insert({key.ToString(), Variant()}); - break; - } - case Variant::kBool: { - if (s.size() == 0) { - return false; - } - map_.insert({key.ToString(), Variant(static_cast(s[0]))}); - s.remove_prefix(1); - break; - } - case Variant::kInt: { - uint64_t v; - if (!GetVarint64(&s, &v)) { - return false; - } - map_.insert({key.ToString(), Variant(v)}); - break; - } - case Variant::kDouble: { - double d; - if (!GetDouble(&s, &d)) { - return false; - } - map_.insert({key.ToString(), Variant(d)}); - break; - } - case Variant::kString: { - Slice str; - if (!GetLengthPrefixedSlice(&s, &str)) { - return false; - } - map_.insert({key.ToString(), str.ToString()}); - break; - } - default: - return false; - } - } - return true; -} - -std::string FeatureSet::DebugString() const { - std::string out = "{"; - bool comma = false; - for (const auto& iter : map_) { - if (comma) { - out.append(", "); - } else { - comma = true; - } - out.append("\"" + iter.first + "\": "); - switch (iter.second.type()) { - case Variant::kNull: - out.append("null"); - break; - case Variant::kBool: - if (iter.second.get_bool()) { - out.append("true"); - } else { - out.append("false"); - } - break; - case Variant::kInt: { - char buf[32]; - snprintf(buf, sizeof(buf), "%" PRIu64, iter.second.get_int()); - out.append(buf); - break; - } - case Variant::kDouble: { - char buf[32]; - snprintf(buf, sizeof(buf), "%lf", iter.second.get_double()); - out.append(buf); - break; - } - case Variant::kString: - out.append("\"" + iter.second.get_string() + "\""); - break; - default: - assert(false); - } - } - return out + "}"; -} - -class ValueGetter { - public: - ValueGetter() {} - virtual ~ValueGetter() {} - - virtual bool Get(uint64_t id) = 0; - virtual const Slice value() const = 0; - - virtual Status status() const = 0; -}; - -class ValueGetterFromDB : public ValueGetter { - public: - ValueGetterFromDB(DB* db, ColumnFamilyHandle* cf) : db_(db), cf_(cf) {} - - virtual bool Get(uint64_t id) override { - std::string encoded_id; - PutFixed64BigEndian(&encoded_id, id); - status_ = db_->Get(ReadOptions(), cf_, encoded_id, &value_); - if (status_.IsNotFound()) { - status_ = Status::Corruption("Index inconsistency"); - return false; - } - - return true; - } - - virtual const Slice value() const override { return value_; } - - virtual Status status() const override { return status_; } - - private: - std::string value_; - DB* db_; - ColumnFamilyHandle* cf_; - Status status_; -}; - -class ValueGetterFromIterator : public ValueGetter { - public: - explicit ValueGetterFromIterator(Iterator* iterator) : iterator_(iterator) {} - - virtual bool Get(uint64_t id) override { - std::string encoded_id; - PutFixed64BigEndian(&encoded_id, id); - iterator_->Seek(encoded_id); - - if (!iterator_->Valid() || iterator_->key() != Slice(encoded_id)) { - status_ = Status::Corruption("Index inconsistency"); - return false; - } - - return true; - } - - virtual const Slice value() const override { return iterator_->value(); } - - virtual Status status() const override { return status_; } - - private: - std::unique_ptr iterator_; - Status status_; -}; - -class SpatialIndexCursor : public Cursor { - public: - // tile_box is inclusive - SpatialIndexCursor(Iterator* spatial_iterator, ValueGetter* value_getter, - const BoundingBox& tile_bbox, uint32_t tile_bits) - : value_getter_(value_getter), valid_(true) { - // calculate quad keys we'll need to query - std::vector quad_keys; - quad_keys.reserve(static_cast((tile_bbox.max_x - tile_bbox.min_x + 1) * - (tile_bbox.max_y - tile_bbox.min_y + 1))); - for (uint64_t x = tile_bbox.min_x; x <= tile_bbox.max_x; ++x) { - for (uint64_t y = tile_bbox.min_y; y <= tile_bbox.max_y; ++y) { - quad_keys.push_back(GetQuadKeyFromTile(x, y, tile_bits)); - } - } - std::sort(quad_keys.begin(), quad_keys.end()); - - // load primary key ids for all quad keys - for (auto quad_key : quad_keys) { - std::string encoded_quad_key; - PutFixed64BigEndian(&encoded_quad_key, quad_key); - Slice slice_quad_key(encoded_quad_key); - - // If CheckQuadKey is true, there is no need to reseek, since - // spatial_iterator is already pointing at the correct quad key. This is - // an optimization. - if (!CheckQuadKey(spatial_iterator, slice_quad_key)) { - spatial_iterator->Seek(slice_quad_key); - } - - while (CheckQuadKey(spatial_iterator, slice_quad_key)) { - // extract ID from spatial_iterator - uint64_t id; - bool ok = GetFixed64BigEndian( - Slice(spatial_iterator->key().data() + sizeof(uint64_t), - sizeof(uint64_t)), - &id); - if (!ok) { - valid_ = false; - status_ = Status::Corruption("Spatial index corruption"); - break; - } - primary_key_ids_.insert(id); - spatial_iterator->Next(); - } - } - - if (!spatial_iterator->status().ok()) { - status_ = spatial_iterator->status(); - valid_ = false; - } - delete spatial_iterator; - - valid_ = valid_ && !primary_key_ids_.empty(); - - if (valid_) { - primary_keys_iterator_ = primary_key_ids_.begin(); - ExtractData(); - } - } - - virtual bool Valid() const override { return valid_; } - - virtual void Next() override { - assert(valid_); - - ++primary_keys_iterator_; - if (primary_keys_iterator_ == primary_key_ids_.end()) { - valid_ = false; - return; - } - - ExtractData(); - } - - virtual const Slice blob() override { return current_blob_; } - virtual const FeatureSet& feature_set() override { - return current_feature_set_; - } - - virtual Status status() const override { - if (!status_.ok()) { - return status_; - } - return value_getter_->status(); - } - - private: - // * returns true if spatial iterator is on the current quad key and all is - // well - // * returns false if spatial iterator is not on current, or iterator is - // invalid or corruption - bool CheckQuadKey(Iterator* spatial_iterator, const Slice& quad_key) { - if (!spatial_iterator->Valid()) { - return false; - } - if (spatial_iterator->key().size() != 2 * sizeof(uint64_t)) { - status_ = Status::Corruption("Invalid spatial index key"); - valid_ = false; - return false; - } - Slice spatial_iterator_quad_key(spatial_iterator->key().data(), - sizeof(uint64_t)); - if (spatial_iterator_quad_key != quad_key) { - // caller needs to reseek - return false; - } - // if we come to here, we have found the quad key - return true; - } - - void ExtractData() { - assert(valid_); - valid_ = value_getter_->Get(*primary_keys_iterator_); - - if (valid_) { - Slice data = value_getter_->value(); - current_feature_set_.Clear(); - if (!GetLengthPrefixedSlice(&data, ¤t_blob_) || - !current_feature_set_.Deserialize(data)) { - status_ = Status::Corruption("Primary key column family corruption"); - valid_ = false; - } - } - - } - - std::unique_ptr value_getter_; - bool valid_; - Status status_; - - FeatureSet current_feature_set_; - Slice current_blob_; - - // This is loaded from spatial iterator. - std::unordered_set primary_key_ids_; - std::unordered_set::iterator primary_keys_iterator_; -}; - -class ErrorCursor : public Cursor { - public: - explicit ErrorCursor(Status s) : s_(s) { assert(!s.ok()); } - virtual Status status() const override { return s_; } - virtual bool Valid() const override { return false; } - virtual void Next() override { assert(false); } - - virtual const Slice blob() override { - assert(false); - return Slice(); - } - virtual const FeatureSet& feature_set() override { - assert(false); - // compiler complains otherwise - return trash_; - } - - private: - Status s_; - FeatureSet trash_; -}; - -class SpatialDBImpl : public SpatialDB { - public: - // * db -- base DB that needs to be forwarded to StackableDB - // * data_column_family -- column family used to store the data - // * spatial_indexes -- a list of spatial indexes together with column - // families that correspond to those spatial indexes - // * next_id -- next ID in auto-incrementing ID. This is usually - // `max_id_currenty_in_db + 1` - SpatialDBImpl( - DB* db, ColumnFamilyHandle* data_column_family, - const std::vector>& - spatial_indexes, - uint64_t next_id, bool read_only) - : SpatialDB(db), - data_column_family_(data_column_family), - next_id_(next_id), - read_only_(read_only) { - for (const auto& index : spatial_indexes) { - name_to_index_.insert( - {index.first.name, IndexColumnFamily(index.first, index.second)}); - } - } - - ~SpatialDBImpl() { - for (auto& iter : name_to_index_) { - delete iter.second.column_family; - } - delete data_column_family_; - } - - virtual Status Insert( - const WriteOptions& write_options, const BoundingBox& bbox, - const Slice& blob, const FeatureSet& feature_set, - const std::vector& spatial_indexes) override { - WriteBatch batch; - - if (spatial_indexes.size() == 0) { - return Status::InvalidArgument("Spatial indexes can't be empty"); - } - - const size_t kWriteOutEveryBytes = 1024 * 1024; // 1MB - uint64_t id = next_id_.fetch_add(1); - - for (const auto& si : spatial_indexes) { - auto itr = name_to_index_.find(si); - if (itr == name_to_index_.end()) { - return Status::InvalidArgument("Can't find index " + si); - } - const auto& spatial_index = itr->second.index; - if (!spatial_index.bbox.Intersects(bbox)) { - continue; - } - BoundingBox tile_bbox = GetTileBoundingBox(spatial_index, bbox); - - for (uint64_t x = tile_bbox.min_x; x <= tile_bbox.max_x; ++x) { - for (uint64_t y = tile_bbox.min_y; y <= tile_bbox.max_y; ++y) { - // see above for format - std::string key; - PutFixed64BigEndian( - &key, GetQuadKeyFromTile(x, y, spatial_index.tile_bits)); - PutFixed64BigEndian(&key, id); - batch.Put(itr->second.column_family, key, Slice()); - if (batch.GetDataSize() >= kWriteOutEveryBytes) { - Status s = Write(write_options, &batch); - batch.Clear(); - if (!s.ok()) { - return s; - } - } - } - } - } - - // see above for format - std::string data_key; - PutFixed64BigEndian(&data_key, id); - std::string data_value; - PutLengthPrefixedSlice(&data_value, blob); - feature_set.Serialize(&data_value); - batch.Put(data_column_family_, data_key, data_value); - - return Write(write_options, &batch); - } - - virtual Status Compact(int num_threads) override { - std::vector column_families; - column_families.push_back(data_column_family_); - - for (auto& iter : name_to_index_) { - column_families.push_back(iter.second.column_family); - } - - std::mutex state_mutex; - std::condition_variable cv; - Status s; - int threads_running = 0; - - std::vector threads; - - for (auto cfh : column_families) { - threads.emplace_back([&, cfh] { - { - std::unique_lock lk(state_mutex); - cv.wait(lk, [&] { return threads_running < num_threads; }); - threads_running++; - } - - Status t = Flush(FlushOptions(), cfh); - if (t.ok()) { - t = CompactRange(CompactRangeOptions(), cfh, nullptr, nullptr); - } - - { - std::unique_lock lk(state_mutex); - threads_running--; - if (s.ok() && !t.ok()) { - s = t; - } - cv.notify_one(); - } - }); - } - - for (auto& t : threads) { - t.join(); - } - - return s; - } - - virtual Cursor* Query(const ReadOptions& read_options, - const BoundingBox& bbox, - const std::string& spatial_index) override { - auto itr = name_to_index_.find(spatial_index); - if (itr == name_to_index_.end()) { - return new ErrorCursor(Status::InvalidArgument( - "Spatial index " + spatial_index + " not found")); - } - const auto& si = itr->second.index; - Iterator* spatial_iterator; - ValueGetter* value_getter; - - if (read_only_) { - spatial_iterator = NewIterator(read_options, itr->second.column_family); - value_getter = new ValueGetterFromDB(this, data_column_family_); - } else { - std::vector iterators; - Status s = NewIterators(read_options, - {data_column_family_, itr->second.column_family}, - &iterators); - if (!s.ok()) { - return new ErrorCursor(s); - } - - spatial_iterator = iterators[1]; - value_getter = new ValueGetterFromIterator(iterators[0]); - } - return new SpatialIndexCursor(spatial_iterator, value_getter, - GetTileBoundingBox(si, bbox), si.tile_bits); - } - - private: - ColumnFamilyHandle* data_column_family_; - struct IndexColumnFamily { - SpatialIndexOptions index; - ColumnFamilyHandle* column_family; - IndexColumnFamily(const SpatialIndexOptions& _index, - ColumnFamilyHandle* _cf) - : index(_index), column_family(_cf) {} - }; - // constant after construction! - std::unordered_map name_to_index_; - - std::atomic next_id_; - bool read_only_; -}; - -namespace { -DBOptions GetDBOptionsFromSpatialDBOptions(const SpatialDBOptions& options) { - DBOptions db_options; - db_options.max_open_files = 50000; - db_options.max_background_compactions = 3 * options.num_threads / 4; - db_options.max_background_flushes = - options.num_threads - db_options.max_background_compactions; - db_options.env->SetBackgroundThreads(db_options.max_background_compactions, - Env::LOW); - db_options.env->SetBackgroundThreads(db_options.max_background_flushes, - Env::HIGH); - db_options.statistics = CreateDBStatistics(); - if (options.bulk_load) { - db_options.stats_dump_period_sec = 600; - } else { - db_options.stats_dump_period_sec = 1800; // 30min - } - return db_options; -} - -ColumnFamilyOptions GetColumnFamilyOptions(const SpatialDBOptions& /*options*/, - std::shared_ptr block_cache) { - ColumnFamilyOptions column_family_options; - column_family_options.write_buffer_size = 128 * 1024 * 1024; // 128MB - column_family_options.max_write_buffer_number = 4; - column_family_options.max_bytes_for_level_base = 256 * 1024 * 1024; // 256MB - column_family_options.target_file_size_base = 64 * 1024 * 1024; // 64MB - column_family_options.level0_file_num_compaction_trigger = 2; - column_family_options.level0_slowdown_writes_trigger = 16; - column_family_options.level0_stop_writes_trigger = 32; - // only compress levels >= 2 - column_family_options.compression_per_level.resize( - column_family_options.num_levels); - for (int i = 0; i < column_family_options.num_levels; ++i) { - if (i < 2) { - column_family_options.compression_per_level[i] = kNoCompression; - } else { - column_family_options.compression_per_level[i] = kLZ4Compression; - } - } - BlockBasedTableOptions table_options; - table_options.block_cache = block_cache; - column_family_options.table_factory.reset( - NewBlockBasedTableFactory(table_options)); - return column_family_options; -} - -ColumnFamilyOptions OptimizeOptionsForDataColumnFamily( - ColumnFamilyOptions options, std::shared_ptr block_cache) { - options.prefix_extractor.reset(NewNoopTransform()); - BlockBasedTableOptions block_based_options; - block_based_options.index_type = BlockBasedTableOptions::kHashSearch; - block_based_options.block_cache = block_cache; - options.table_factory.reset(NewBlockBasedTableFactory(block_based_options)); - return options; -} - -} // namespace - -class MetadataStorage { - public: - MetadataStorage(DB* db, ColumnFamilyHandle* cf) : db_(db), cf_(cf) {} - ~MetadataStorage() {} - - // format: - // - Status AddIndex(const SpatialIndexOptions& index) { - std::string encoded_index; - PutDouble(&encoded_index, index.bbox.min_x); - PutDouble(&encoded_index, index.bbox.min_y); - PutDouble(&encoded_index, index.bbox.max_x); - PutDouble(&encoded_index, index.bbox.max_y); - PutVarint32(&encoded_index, index.tile_bits); - return db_->Put(WriteOptions(), cf_, - GetSpatialIndexColumnFamilyName(index.name), encoded_index); - } - - Status GetIndex(const std::string& name, SpatialIndexOptions* dst) { - std::string value; - Status s = db_->Get(ReadOptions(), cf_, - GetSpatialIndexColumnFamilyName(name), &value); - if (!s.ok()) { - return s; - } - dst->name = name; - Slice encoded_index(value); - bool ok = GetDouble(&encoded_index, &(dst->bbox.min_x)); - ok = ok && GetDouble(&encoded_index, &(dst->bbox.min_y)); - ok = ok && GetDouble(&encoded_index, &(dst->bbox.max_x)); - ok = ok && GetDouble(&encoded_index, &(dst->bbox.max_y)); - ok = ok && GetVarint32(&encoded_index, &(dst->tile_bits)); - return ok ? Status::OK() : Status::Corruption("Index encoding corrupted"); - } - - private: - DB* db_; - ColumnFamilyHandle* cf_; -}; - -Status SpatialDB::Create( - const SpatialDBOptions& options, const std::string& name, - const std::vector& spatial_indexes) { - DBOptions db_options = GetDBOptionsFromSpatialDBOptions(options); - db_options.create_if_missing = true; - db_options.create_missing_column_families = true; - db_options.error_if_exists = true; - - auto block_cache = NewLRUCache(static_cast(options.cache_size)); - ColumnFamilyOptions column_family_options = - GetColumnFamilyOptions(options, block_cache); - - std::vector column_families; - column_families.push_back(ColumnFamilyDescriptor( - kDefaultColumnFamilyName, - OptimizeOptionsForDataColumnFamily(column_family_options, block_cache))); - column_families.push_back( - ColumnFamilyDescriptor(kMetadataColumnFamilyName, column_family_options)); - - for (const auto& index : spatial_indexes) { - column_families.emplace_back(GetSpatialIndexColumnFamilyName(index.name), - column_family_options); - } - - std::vector handles; - DB* base_db; - Status s = DB::Open(db_options, name, column_families, &handles, &base_db); - if (!s.ok()) { - return s; - } - MetadataStorage metadata(base_db, handles[1]); - for (const auto& index : spatial_indexes) { - s = metadata.AddIndex(index); - if (!s.ok()) { - break; - } - } - - for (auto h : handles) { - delete h; - } - delete base_db; - - return s; -} - -Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, - SpatialDB** db, bool read_only) { - DBOptions db_options = GetDBOptionsFromSpatialDBOptions(options); - auto block_cache = NewLRUCache(static_cast(options.cache_size)); - ColumnFamilyOptions column_family_options = - GetColumnFamilyOptions(options, block_cache); - - Status s; - std::vector existing_column_families; - std::vector spatial_indexes; - s = DB::ListColumnFamilies(db_options, name, &existing_column_families); - if (!s.ok()) { - return s; - } - for (const auto& cf_name : existing_column_families) { - Slice spatial_index; - if (GetSpatialIndexName(cf_name, &spatial_index)) { - spatial_indexes.emplace_back(spatial_index.data(), spatial_index.size()); - } - } - - std::vector column_families; - column_families.push_back(ColumnFamilyDescriptor( - kDefaultColumnFamilyName, - OptimizeOptionsForDataColumnFamily(column_family_options, block_cache))); - column_families.push_back( - ColumnFamilyDescriptor(kMetadataColumnFamilyName, column_family_options)); - - for (const auto& index : spatial_indexes) { - column_families.emplace_back(GetSpatialIndexColumnFamilyName(index), - column_family_options); - } - std::vector handles; - DB* base_db; - if (read_only) { - s = DB::OpenForReadOnly(db_options, name, column_families, &handles, - &base_db); - } else { - s = DB::Open(db_options, name, column_families, &handles, &base_db); - } - if (!s.ok()) { - return s; - } - - MetadataStorage metadata(base_db, handles[1]); - - std::vector> index_cf; - assert(handles.size() == spatial_indexes.size() + 2); - for (size_t i = 0; i < spatial_indexes.size(); ++i) { - SpatialIndexOptions index_options; - s = metadata.GetIndex(spatial_indexes[i], &index_options); - if (!s.ok()) { - break; - } - index_cf.emplace_back(index_options, handles[i + 2]); - } - uint64_t next_id = 1; - if (s.ok()) { - // find next_id - Iterator* iter = base_db->NewIterator(ReadOptions(), handles[0]); - iter->SeekToLast(); - if (iter->Valid()) { - uint64_t last_id = 0; - if (!GetFixed64BigEndian(iter->key(), &last_id)) { - s = Status::Corruption("Invalid key in data column family"); - } else { - next_id = last_id + 1; - } - } - delete iter; - } - if (!s.ok()) { - for (auto h : handles) { - delete h; - } - delete base_db; - return s; - } - - // I don't need metadata column family any more, so delete it - delete handles[1]; - *db = new SpatialDBImpl(base_db, handles[0], index_cf, next_id, read_only); - return Status::OK(); -} - -} // namespace spatial -} // namespace rocksdb -#endif // ROCKSDB_LITE diff --git a/utilities/spatialdb/spatial_db_test.cc b/utilities/spatialdb/spatial_db_test.cc deleted file mode 100644 index cb92af8b1..000000000 --- a/utilities/spatialdb/spatial_db_test.cc +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef ROCKSDB_LITE - -#include -#include -#include - -#include "rocksdb/utilities/spatial_db.h" -#include "util/compression.h" -#include "util/testharness.h" -#include "util/testutil.h" -#include "util/random.h" - -namespace rocksdb { -namespace spatial { - -class SpatialDBTest : public testing::Test { - public: - SpatialDBTest() { - dbname_ = test::PerThreadDBPath("spatial_db_test"); - DestroyDB(dbname_, Options()); - } - - void AssertCursorResults(BoundingBox bbox, const std::string& index, - const std::vector& blobs) { - Cursor* c = db_->Query(ReadOptions(), bbox, index); - ASSERT_OK(c->status()); - std::multiset b; - for (auto x : blobs) { - b.insert(x); - } - - while (c->Valid()) { - auto itr = b.find(c->blob().ToString()); - ASSERT_TRUE(itr != b.end()); - b.erase(itr); - c->Next(); - } - ASSERT_EQ(b.size(), 0U); - ASSERT_OK(c->status()); - delete c; - } - - std::string dbname_; - SpatialDB* db_; -}; - -TEST_F(SpatialDBTest, FeatureSetSerializeTest) { - if (!LZ4_Supported()) { - return; - } - FeatureSet fs; - - fs.Set("a", std::string("b")); - fs.Set("x", static_cast(3)); - fs.Set("y", false); - fs.Set("n", Variant()); // null - fs.Set("m", 3.25); - - ASSERT_TRUE(fs.Find("w") == fs.end()); - ASSERT_TRUE(fs.Find("x") != fs.end()); - ASSERT_TRUE((*fs.Find("x")).second == Variant(static_cast(3))); - ASSERT_TRUE((*fs.Find("y")).second != Variant(true)); - std::set keys({"a", "x", "y", "n", "m"}); - for (const auto& x : fs) { - ASSERT_TRUE(keys.find(x.first) != keys.end()); - keys.erase(x.first); - } - ASSERT_EQ(keys.size(), 0U); - - std::string serialized; - fs.Serialize(&serialized); - - FeatureSet deserialized; - ASSERT_TRUE(deserialized.Deserialize(serialized)); - - ASSERT_TRUE(deserialized.Contains("a")); - ASSERT_EQ(deserialized.Get("a").type(), Variant::kString); - ASSERT_EQ(deserialized.Get("a").get_string(), "b"); - ASSERT_TRUE(deserialized.Contains("x")); - ASSERT_EQ(deserialized.Get("x").type(), Variant::kInt); - ASSERT_EQ(deserialized.Get("x").get_int(), static_cast(3)); - ASSERT_TRUE(deserialized.Contains("y")); - ASSERT_EQ(deserialized.Get("y").type(), Variant::kBool); - ASSERT_EQ(deserialized.Get("y").get_bool(), false); - ASSERT_TRUE(deserialized.Contains("n")); - ASSERT_EQ(deserialized.Get("n").type(), Variant::kNull); - ASSERT_TRUE(deserialized.Contains("m")); - ASSERT_EQ(deserialized.Get("m").type(), Variant::kDouble); - ASSERT_EQ(deserialized.Get("m").get_double(), 3.25); - - // corrupted serialization - serialized = serialized.substr(0, serialized.size() - 1); - deserialized.Clear(); - ASSERT_TRUE(!deserialized.Deserialize(serialized)); -} - -TEST_F(SpatialDBTest, TestNextID) { - if (!LZ4_Supported()) { - return; - } - ASSERT_OK(SpatialDB::Create( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)})); - - ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), - "one", FeatureSet(), {"simple"})); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(10, 10, 15, 15), - "two", FeatureSet(), {"simple"})); - delete db_; - db_ = nullptr; - - ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); - assert(db_ != nullptr); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(55, 55, 65, 65), - "three", FeatureSet(), {"simple"})); - delete db_; - - ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); - AssertCursorResults(BoundingBox(0, 0, 100, 100), "simple", - {"one", "two", "three"}); - delete db_; -} - -TEST_F(SpatialDBTest, FeatureSetTest) { - if (!LZ4_Supported()) { - return; - } - ASSERT_OK(SpatialDB::Create( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)})); - ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); - - FeatureSet fs; - fs.Set("a", std::string("b")); - fs.Set("c", std::string("d")); - - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), - "one", fs, {"simple"})); - - Cursor* c = - db_->Query(ReadOptions(), BoundingBox(5, 5, 10, 10), "simple"); - - ASSERT_TRUE(c->Valid()); - ASSERT_EQ(c->blob().compare("one"), 0); - FeatureSet returned = c->feature_set(); - ASSERT_TRUE(returned.Contains("a")); - ASSERT_TRUE(!returned.Contains("b")); - ASSERT_TRUE(returned.Contains("c")); - ASSERT_EQ(returned.Get("a").type(), Variant::kString); - ASSERT_EQ(returned.Get("a").get_string(), "b"); - ASSERT_EQ(returned.Get("c").type(), Variant::kString); - ASSERT_EQ(returned.Get("c").get_string(), "d"); - - c->Next(); - ASSERT_TRUE(!c->Valid()); - - delete c; - delete db_; -} - -TEST_F(SpatialDBTest, SimpleTest) { - if (!LZ4_Supported()) { - return; - } - // iter 0 -- not read only - // iter 1 -- read only - for (int iter = 0; iter < 2; ++iter) { - DestroyDB(dbname_, Options()); - ASSERT_OK(SpatialDB::Create( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("index", BoundingBox(0, 0, 128, 128), - 3)})); - ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); - assert(db_ != nullptr); - - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(33, 17, 63, 79), - "one", FeatureSet(), {"index"})); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(65, 65, 111, 111), - "two", FeatureSet(), {"index"})); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(1, 49, 127, 63), - "three", FeatureSet(), {"index"})); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(20, 100, 21, 101), - "four", FeatureSet(), {"index"})); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(81, 33, 127, 63), - "five", FeatureSet(), {"index"})); - ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(1, 65, 47, 95), - "six", FeatureSet(), {"index"})); - - if (iter == 1) { - delete db_; - db_ = nullptr; - ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_, true)); - } - - AssertCursorResults(BoundingBox(33, 17, 47, 31), "index", {"one"}); - AssertCursorResults(BoundingBox(17, 33, 79, 63), "index", - {"one", "three"}); - AssertCursorResults(BoundingBox(17, 81, 63, 111), "index", - {"four", "six"}); - AssertCursorResults(BoundingBox(85, 86, 85, 86), "index", {"two"}); - AssertCursorResults(BoundingBox(33, 1, 127, 111), "index", - {"one", "two", "three", "five", "six"}); - // even though the bounding box doesn't intersect, we got "four" back - // because - // it's in the same tile - AssertCursorResults(BoundingBox(18, 98, 19, 99), "index", {"four"}); - AssertCursorResults(BoundingBox(130, 130, 131, 131), "index", {}); - AssertCursorResults(BoundingBox(81, 17, 127, 31), "index", {}); - AssertCursorResults(BoundingBox(90, 50, 91, 51), "index", - {"three", "five"}); - - delete db_; - db_ = nullptr; - } -} - -namespace { -std::string RandomStr(Random* rnd) { - std::string r; - for (int k = 0; k < 10; ++k) { - r.push_back(static_cast(rnd->Uniform(26)) + 'a'); - } - return r; -} - -BoundingBox RandomBoundingBox(int limit, Random* rnd, int max_size) { - BoundingBox r; - r.min_x = rnd->Uniform(limit - 1); - r.min_y = rnd->Uniform(limit - 1); - r.max_x = r.min_x + rnd->Uniform(std::min(limit - 1 - r.min_x, max_size)) + 1; - r.max_y = r.min_y + rnd->Uniform(std::min(limit - 1 - r.min_y, max_size)) + 1; - return r; -} - -BoundingBox ScaleBB(BoundingBox b, double step) { - return BoundingBox(b.min_x * step + 1, b.min_y * step + 1, - (b.max_x + 1) * step - 1, - (b.max_y + 1) * step - 1); -} - -} // namespace - -TEST_F(SpatialDBTest, RandomizedTest) { - if (!LZ4_Supported()) { - return; - } - Random rnd(301); - std::vector>> elements; - - BoundingBox spatial_index_bounds(0, 0, (1LL << 32), (1LL << 32)); - ASSERT_OK(SpatialDB::Create( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("index", spatial_index_bounds, 7)})); - ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); - double step = (1LL << 32) / (1 << 7); - - for (int i = 0; i < 1000; ++i) { - std::string blob = RandomStr(&rnd); - BoundingBox bbox = RandomBoundingBox(128, &rnd, 10); - ASSERT_OK(db_->Insert(WriteOptions(), ScaleBB(bbox, step), blob, - FeatureSet(), {"index"})); - elements.push_back(make_pair(blob, bbox)); - } - - // parallel - db_->Compact(2); - // serial - db_->Compact(1); - - for (int i = 0; i < 1000; ++i) { - BoundingBox int_bbox = RandomBoundingBox(128, &rnd, 10); - BoundingBox double_bbox = ScaleBB(int_bbox, step); - std::vector blobs; - for (auto e : elements) { - if (e.second.Intersects(int_bbox)) { - blobs.push_back(e.first); - } - } - AssertCursorResults(double_bbox, "index", blobs); - } - - delete db_; -} - -} // namespace spatial -} // namespace rocksdb - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -#else -#include - -int main(int /*argc*/, char** /*argv*/) { - fprintf(stderr, "SKIPPED as SpatialDB is not supported in ROCKSDB_LITE\n"); - return 0; -} - -#endif // !ROCKSDB_LITE diff --git a/utilities/spatialdb/utils.h b/utilities/spatialdb/utils.h deleted file mode 100644 index fe4b4e253..000000000 --- a/utilities/spatialdb/utils.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#include -#include - -#include "rocksdb/utilities/spatial_db.h" - -namespace rocksdb { -namespace spatial { - -// indexing idea from http://msdn.microsoft.com/en-us/library/bb259689.aspx -inline uint64_t GetTileFromCoord(double x, double start, double end, - uint32_t tile_bits) { - if (x < start) { - return 0; - } - uint64_t tiles = 1ull << tile_bits; - uint64_t r = static_cast(((x - start) / (end - start)) * tiles); - return std::min(r, tiles - 1); -} - -inline uint64_t GetQuadKeyFromTile(uint64_t tile_x, uint64_t tile_y, - uint32_t tile_bits) { - uint64_t quad_key = 0; - for (uint32_t i = 0; i < tile_bits; ++i) { - uint64_t mask = (1ull << i); - quad_key |= (tile_x & mask) << i; - quad_key |= (tile_y & mask) << (i + 1); - } - return quad_key; -} - -inline BoundingBox GetTileBoundingBox( - const SpatialIndexOptions& spatial_index, BoundingBox bbox) { - return BoundingBox( - GetTileFromCoord(bbox.min_x, spatial_index.bbox.min_x, - spatial_index.bbox.max_x, spatial_index.tile_bits), - GetTileFromCoord(bbox.min_y, spatial_index.bbox.min_y, - spatial_index.bbox.max_y, spatial_index.tile_bits), - GetTileFromCoord(bbox.max_x, spatial_index.bbox.min_x, - spatial_index.bbox.max_x, spatial_index.tile_bits), - GetTileFromCoord(bbox.max_y, spatial_index.bbox.min_y, - spatial_index.bbox.max_y, spatial_index.tile_bits)); -} - -// big endian can be compared using memcpy -inline void PutFixed64BigEndian(std::string* dst, uint64_t value) { - char buf[sizeof(value)]; - buf[0] = (value >> 56) & 0xff; - buf[1] = (value >> 48) & 0xff; - buf[2] = (value >> 40) & 0xff; - buf[3] = (value >> 32) & 0xff; - buf[4] = (value >> 24) & 0xff; - buf[5] = (value >> 16) & 0xff; - buf[6] = (value >> 8) & 0xff; - buf[7] = value & 0xff; - dst->append(buf, sizeof(buf)); -} - -// big endian can be compared using memcpy -inline bool GetFixed64BigEndian(const Slice& input, uint64_t* value) { - if (input.size() < sizeof(uint64_t)) { - return false; - } - auto ptr = input.data(); - *value = (static_cast(static_cast(ptr[0])) << 56) | - (static_cast(static_cast(ptr[1])) << 48) | - (static_cast(static_cast(ptr[2])) << 40) | - (static_cast(static_cast(ptr[3])) << 32) | - (static_cast(static_cast(ptr[4])) << 24) | - (static_cast(static_cast(ptr[5])) << 16) | - (static_cast(static_cast(ptr[6])) << 8) | - static_cast(static_cast(ptr[7])); - return true; -} - -inline void PutDouble(std::string* dst, double d) { - dst->append(reinterpret_cast(&d), sizeof(double)); -} - -inline bool GetDouble(Slice* input, double* d) { - if (input->size() < sizeof(double)) { - return false; - } - memcpy(d, input->data(), sizeof(double)); - input->remove_prefix(sizeof(double)); - return true; -} - -} // namespace spatial -} // namespace rocksdb