diff --git a/Makefile b/Makefile index 1dd4299a8..ad0f5d310 100644 --- a/Makefile +++ b/Makefile @@ -107,6 +107,7 @@ TESTS = \ backupable_db_test \ document_db_test \ json_document_test \ + spatial_db_test \ version_edit_test \ version_set_test \ file_indexer_test \ @@ -359,6 +360,9 @@ document_db_test: utilities/document/document_db_test.o $(LIBOBJECTS) $(TESTHARN json_document_test: utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +spatial_db_test: utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) + ttl_test: utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) diff --git a/include/rocksdb/utilities/spatial_db.h b/include/rocksdb/utilities/spatial_db.h new file mode 100644 index 000000000..d5f994132 --- /dev/null +++ b/include/rocksdb/utilities/spatial_db.h @@ -0,0 +1,231 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once +#ifndef ROCKSDB_LITE + +#include +#include + +#include "rocksdb/db.h" +#include "rocksdb/slice.h" +#include "rocksdb/utilities/stackable_db.h" + +namespace rocksdb { +namespace spatial { + +// NOTE: SpatialDB is experimental and we might change its API without warning. +// Please talk to us before developing against SpatialDB API. +// +// SpatialDB is a support for spatial indexes built on top of RocksDB. +// When creating a new SpatialDB, clients specifies a list of spatial indexes to +// build on their data. Each spatial index is defined by the area and +// granularity. If you're storing map data, different spatial index +// granularities can be used for different zoom levels. +// +// Each element inserted into SpatialDB has: +// * a bounding box, which determines how will the element be indexed +// * string blob, which will usually be WKB representation of the polygon +// (http://en.wikipedia.org/wiki/Well-known_text) +// * feature set, which is a map of key-value pairs, where value can be null, +// int, double, bool, string +// * a list of indexes to insert the element in +// +// Each query is executed on a single spatial index. Query guarantees that it +// will return all elements intersecting the specified bounding box, but it +// might also return some extra non-intersecting elements. + +// Variant is a class that can be many things: null, bool, int, double or string +// It is used to store different value types in FeatureSet (see below) +struct Variant { + // Don't change the values here, they are persisted on disk + enum Type { + kNull = 0x0, + kBool = 0x1, + kInt = 0x2, + kDouble = 0x3, + kString = 0x4, + }; + + Variant() : type_(kNull) {} + /* implicit */ Variant(bool b) : type_(kBool) { data_.b = b; } + /* implicit */ Variant(uint64_t i) : type_(kInt) { data_.i = i; } + /* implicit */ Variant(double d) : type_(kDouble) { data_.d = d; } + /* implicit */ Variant(const std::string& s) : type_(kString) { + new (&data_.s) std::string(s); + } + + Variant(const Variant& v); + + ~Variant() { + if (type_ == kString) { + using std::string; + (&data_.s)->~string(); + } + } + + Type type() const { return type_; } + bool get_bool() const { return data_.b; } + uint64_t get_int() const { return data_.i; } + double get_double() const { return data_.d; } + const std::string& get_string() const { return data_.s; } + + bool operator==(const Variant& other); + bool operator!=(const Variant& other); + + private: + Type type_; + union Data { + Data() {} + ~Data() {} + bool b; + uint64_t i; + double d; + std::string s; + } data_; +}; + +// FeatureSet is a map of key-value pairs. One feature set is associated with +// each element in SpatialDB. It can be used to add rich data about the element. +class FeatureSet { + private: + typedef std::unordered_map map; + + public: + class iterator { + public: + /* implicit */ iterator(const map::const_iterator itr) : itr_(itr) {} + iterator& operator++() { + ++itr_; + return *this; + } + bool operator!=(const iterator& other) { return itr_ != other.itr_; } + bool operator==(const iterator& other) { return itr_ == other.itr_; } + map::value_type operator*() { return *itr_; } + + private: + map::const_iterator itr_; + }; + FeatureSet() = default; + + FeatureSet* Set(const std::string& key, const Variant& value); + bool Contains(const std::string& key) const; + // REQUIRES: Contains(key) + const Variant& Get(const std::string& key) const; + iterator Find(const std::string& key) const; + + iterator begin() const { return map_.begin(); } + iterator end() const { return map_.end(); } + + void Clear(); + + void Serialize(std::string* output) const; + // REQUIRED: empty FeatureSet + bool Deserialize(const Slice& input); + + private: + map map_; +}; + +// BoundingBox is a helper structure for defining rectangles representing +// bounding boxes of spatial elements. +template +struct BoundingBox { + T min_x, min_y, max_x, max_y; + BoundingBox() = default; + BoundingBox(T _min_x, T _min_y, T _max_x, T _max_y) + : min_x(_min_x), min_y(_min_y), max_x(_max_x), max_y(_max_y) {} + + bool Intersects(const BoundingBox& a) const { + return !(min_x > a.max_x || min_y > a.max_y || a.min_x > max_x || + a.min_y > max_y); + } +}; + +struct SpatialDBOptions { + uint64_t cache_size = 1 * 1024 * 1024 * 1024LL; // 1GB + int num_threads = 16; + bool bulk_load = true; +}; + +// Cursor is used to return data from the query to the client. To get all the +// data from the query, just call Next() while Valid() is true +class Cursor { + public: + Cursor() = default; + virtual ~Cursor() {} + + virtual bool Valid() const = 0; + // REQUIRES: Valid() + virtual void Next() = 0; + + // Lifetime of the underlying storage until the next call to Next() + // REQUIRES: Valid() + virtual const Slice blob() = 0; + // Lifetime of the underlying storage until the next call to Next() + // REQUIRES: Valid() + virtual const FeatureSet& feature_set() = 0; + + virtual Status status() const = 0; + + private: + // No copying allowed + Cursor(const Cursor&); + void operator=(const Cursor&); +}; + +// SpatialIndexOptions defines a spatial index that will be built on the data +struct SpatialIndexOptions { + // Spatial indexes are referenced by names + std::string name; + // An area that is indexed. If the element is not intersecting with spatial + // index's bbox, it will not be inserted into the index + BoundingBox bbox; + // tile_bits control the granularity of the spatial index. Each dimension of + // the bbox will be split into (1 << tile_bits) tiles, so there will be a + // total of (1 << tile_bits)^2 tiles. It is recommended to configure a size of + // each tile to be approximately the size of the query on that spatial index + uint32_t tile_bits; + SpatialIndexOptions() {} + SpatialIndexOptions(const std::string& _name, + const BoundingBox& _bbox, uint32_t _tile_bits) + : name(_name), bbox(_bbox), tile_bits(_tile_bits) {} +}; + +class SpatialDB : public StackableDB { + public: + // Open the SpatialDB. List of spatial_indexes need to include all indexes + // that already exist in the DB (if the DB already exists). It can include new + // indexes, which will be created and initialized as empty (data will not be + // re-indexed). The resulting db object will be returned through db parameter. + // TODO(icanadi) read_only = true doesn't yet work because of #4743185 + static Status Open(const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes, + SpatialDB** db, bool read_only = false); + + explicit SpatialDB(DB* db) : StackableDB(db) {} + + // Insert the element into the DB. Element will be inserted into specified + // spatial_indexes, based on specified bbox. + // REQUIRES: spatial_indexes.size() > 0 + virtual Status Insert(const WriteOptions& write_options, + const BoundingBox& bbox, const Slice& blob, + const FeatureSet& feature_set, + const std::vector& spatial_indexes) = 0; + + // Calling Compact() after inserting a bunch of elements should speed up + // reading. This is especially useful if you use SpatialDBOptions::bulk_load + virtual Status Compact() = 0; + + // Query the specified spatial_index. Query will return all elements that + // intersect bbox, but it may also return some extra elements. + virtual Cursor* Query(const ReadOptions& read_options, + const BoundingBox& bbox, + const std::string& spatial_index) = 0; +}; + +} // namespace spatial +} // namespace rocksdb +#endif // ROCKSDB_LITE diff --git a/utilities/spatialdb/spatial_db.cc b/utilities/spatialdb/spatial_db.cc new file mode 100644 index 000000000..4f75d48c9 --- /dev/null +++ b/utilities/spatialdb/spatial_db.cc @@ -0,0 +1,640 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#ifndef ROCKSDB_LITE + +#include +#include +#include +#include + +#include "rocksdb/cache.h" +#include "rocksdb/db.h" +#include "rocksdb/utilities/stackable_db.h" +#include "rocksdb/utilities/spatial_db.h" +#include "util/coding.h" + +namespace rocksdb { +namespace spatial { + +Variant::Variant(const Variant& v) : type_(v.type_) { + switch (v.type_) { + case kNull: + break; + case kBool: + data_.b = v.data_.b; + break; + case kInt: + data_.i = v.data_.i; + break; + case kDouble: + data_.d = v.data_.d; + break; + case kString: + new (&data_.s) std::string(v.data_.s); + break; + default: + assert(false); + } +} + +bool Variant::operator==(const Variant& rhs) { + if (type_ != rhs.type_) { + return false; + } + + switch (type_) { + case kNull: + return true; + case kBool: + return data_.b == rhs.data_.b; + case kInt: + return data_.i == rhs.data_.i; + case kDouble: + return data_.d == rhs.data_.d; + case kString: + return data_.s == rhs.data_.s; + default: + assert(false); + } +} + +bool Variant::operator!=(const Variant& rhs) { return !(*this == rhs); } + +FeatureSet* FeatureSet::Set(const std::string& key, const Variant& value) { + map_.insert({key, value}); + return this; +} + +bool FeatureSet::Contains(const std::string& key) const { + return map_.find(key) != map_.end(); +} + +const Variant& FeatureSet::Get(const std::string& key) const { + auto itr = map_.find(key); + assert(itr != map_.end()); + return itr->second; +} + +FeatureSet::iterator FeatureSet::Find(const std::string& key) const { + return iterator(map_.find(key)); +} + +void FeatureSet::Clear() { map_.clear(); } + +void FeatureSet::Serialize(std::string* output) const { + for (const auto& iter : map_) { + PutLengthPrefixedSlice(output, iter.first); + output->push_back(static_cast(iter.second.type())); + switch (iter.second.type()) { + case Variant::kNull: + break; + case Variant::kBool: + output->push_back(static_cast(iter.second.get_bool())); + break; + case Variant::kInt: + PutVarint64(output, iter.second.get_int()); + break; + case Variant::kDouble: { + double d = iter.second.get_double(); + output->append(reinterpret_cast(&d), sizeof(double)); + break; + } + case Variant::kString: + PutLengthPrefixedSlice(output, iter.second.get_string()); + break; + default: + assert(false); + } + } +} + +bool FeatureSet::Deserialize(const Slice& input) { + assert(map_.empty()); + Slice s(input); + while (s.size()) { + Slice key; + if (!GetLengthPrefixedSlice(&s, &key) || s.size() == 0) { + return false; + } + char type = s[0]; + s.remove_prefix(1); + switch (type) { + case Variant::kNull: { + map_.insert({key.ToString(), Variant()}); + break; + } + case Variant::kBool: { + if (s.size() == 0) { + return false; + } + map_.insert({key.ToString(), Variant(static_cast(s[0]))}); + s.remove_prefix(1); + break; + } + case Variant::kInt: { + uint64_t v; + if (!GetVarint64(&s, &v)) { + return false; + } + map_.insert({key.ToString(), Variant(v)}); + break; + } + case Variant::kDouble: { + if (s.size() < sizeof(double)) { + return false; + } + double d; + memcpy(&d, s.data(), sizeof(double)); + map_.insert({key.ToString(), Variant(d)}); + s.remove_prefix(sizeof(double)); + break; + } + case Variant::kString: { + Slice str; + if (!GetLengthPrefixedSlice(&s, &str)) { + return false; + } + map_.insert({key.ToString(), str.ToString()}); + break; + } + default: + return false; + } + } + return true; +} + +namespace { +// indexing idea from http://msdn.microsoft.com/en-us/library/bb259689.aspx +inline uint64_t GetTileFromCoord(double x, double start, double end, + uint32_t tile_bits) { + if (x < start) { + return 0; + } + uint64_t tiles = static_cast(1) << tile_bits; + uint64_t r = ((x - start) / (end - start)) * tiles; + return std::min(r, tiles - 1); +} +inline uint64_t GetQuadKeyFromTile(uint64_t tile_x, uint64_t tile_y, + uint32_t tile_bits) { + uint64_t quad_key = 0; + for (uint32_t i = 0; i < tile_bits; ++i) { + uint32_t mask = (1LL << i); + quad_key |= (tile_x & mask) << i; + quad_key |= (tile_y & mask) << (i + 1); + } + return quad_key; +} +inline BoundingBox GetTileBoundingBox( + const SpatialIndexOptions& spatial_index, BoundingBox bbox) { + return BoundingBox( + GetTileFromCoord(bbox.min_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.min_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits)); +} + +// big endian can be compared using memcpy +inline void PutFixed64BigEndian(std::string* dst, uint64_t value) { + char buf[sizeof(value)]; + buf[0] = (value >> 56) & 0xff; + buf[1] = (value >> 48) & 0xff; + buf[2] = (value >> 40) & 0xff; + buf[3] = (value >> 32) & 0xff; + buf[4] = (value >> 24) & 0xff; + buf[5] = (value >> 16) & 0xff; + buf[6] = (value >> 8) & 0xff; + buf[7] = value & 0xff; + dst->append(buf, sizeof(buf)); +} +// big endian can be compared using memcpy +inline bool GetFixed64BigEndian(const Slice& input, uint64_t* value) { + if (input.size() < sizeof(uint64_t)) { + return false; + } + auto ptr = input.data(); + *value = (static_cast(static_cast(ptr[0])) << 56) | + (static_cast(static_cast(ptr[1])) << 48) | + (static_cast(static_cast(ptr[2])) << 40) | + (static_cast(static_cast(ptr[3])) << 32) | + (static_cast(static_cast(ptr[4])) << 24) | + (static_cast(static_cast(ptr[5])) << 16) | + (static_cast(static_cast(ptr[6])) << 8) | + static_cast(static_cast(ptr[7])); + return true; +} + +} // namespace + +class SpatialIndexCursor : public Cursor { + public: + SpatialIndexCursor(Iterator* spatial_iterator, Iterator* data_iterator, + const BoundingBox& tile_bbox, uint32_t tile_bits) + : spatial_iterator_(spatial_iterator), + data_iterator_(data_iterator), + tile_bbox_(tile_bbox), + tile_bits_(tile_bits), + valid_(true) { + current_x_ = tile_bbox.min_x; + current_y_ = tile_bbox.min_y; + UpdateQuadKey(); + ReSeek(); + if (valid_) { + // this is the first ID returned, so I don't care about return value of + // Dedup + Dedup(); + } + if (valid_) { + ExtractData(); + } + } + + virtual bool Valid() const override { return valid_; } + + virtual void Next() override { + assert(valid_); + + // this do-while loop deals only with deduplication + do { + spatial_iterator_->Next(); + if (ExtractID()) { + // OK, found what we needed + continue; + } + + // move to the next tile + Increment(); + + if (ExtractID()) { + // no need to reseek, found what we needed + continue; + } + // reseek, find next good tile + ReSeek(); + } while (valid_ && !Dedup() && valid_); + + if (valid_) { + ExtractData(); + } + } + + virtual const Slice blob() override { return current_blob_; } + virtual const FeatureSet& feature_set() override { + return current_feature_set_; + } + + virtual Status status() const override { + if (!status_.ok()) { + return status_; + } + if (!spatial_iterator_->status().ok()) { + return spatial_iterator_->status(); + } + return data_iterator_->status(); + } + + private: + // returns true if OK, false if already returned (duplicate) + bool Dedup() { + assert(valid_); + uint64_t id; + bool ok = GetFixed64BigEndian(current_id_, &id); + if (!ok) { + valid_ = false; + status_ = Status::Corruption("Spatial index corruption"); + return false; + } + if (returned_ids_.find(id) != returned_ids_.end()) { + return false; + } + returned_ids_.insert(id); + return true; + } + void ReSeek() { + while (valid_) { + spatial_iterator_->Seek(current_quad_key_); + if (ExtractID()) { + // found what we're looking for! + break; + } + Increment(); + } + } + + void Increment() { + ++current_x_; + if (current_x_ > tile_bbox_.max_x) { + current_x_ = tile_bbox_.min_x; + ++current_y_; + } + if (current_y_ > tile_bbox_.max_y) { + valid_ = false; + } else { + UpdateQuadKey(); + } + } + void UpdateQuadKey() { + current_quad_key_.clear(); + PutFixed64BigEndian(¤t_quad_key_, + GetQuadKeyFromTile(current_x_, current_y_, tile_bits_)); + } + // * returns true if spatial iterator is on the current quad key and all is + // well. Caller will call Next() to get new data + // * returns false if spatial iterator is not on current, or invalid or status + // bad. Caller will need to reseek to get new data + bool ExtractID() { + if (!spatial_iterator_->Valid()) { + // caller needs to reseek + return false; + } + if (spatial_iterator_->key().size() != 2 * sizeof(uint64_t)) { + status_ = Status::Corruption("Invalid spatial index key"); + valid_ = false; + return false; + } + Slice quad_key(spatial_iterator_->key().data(), sizeof(uint64_t)); + if (quad_key != current_quad_key_) { + // caller needs to reseek + return false; + } + // if we come to here, we have found the quad key + current_id_ = Slice(spatial_iterator_->key().data() + sizeof(uint64_t), + sizeof(uint64_t)); + return true; + } + // doesn't return anything, but sets valid_ and status_ on corruption + void ExtractData() { + assert(valid_); + data_iterator_->Seek(current_id_); + + if (!data_iterator_->Valid() || data_iterator_->key() != current_id_) { + status_ = Status::Corruption("Inconsistency in data column family"); + valid_ = false; + return; + } + + Slice data = data_iterator_->value(); + current_feature_set_.Clear(); + if (!GetLengthPrefixedSlice(&data, ¤t_blob_) || + !current_feature_set_.Deserialize(data)) { + status_ = Status::Corruption("Data column family corruption"); + valid_ = false; + return; + } + } + + unique_ptr spatial_iterator_; + unique_ptr data_iterator_; + BoundingBox tile_bbox_; + uint32_t tile_bits_; + uint64_t current_x_; + uint64_t current_y_; + std::string current_quad_key_; + Slice current_id_; + bool valid_; + Status status_; + + FeatureSet current_feature_set_; + Slice current_blob_; + + // used for deduplicating results + std::set returned_ids_; +}; + +class ErrorCursor : public Cursor { + public: + explicit ErrorCursor(Status s) : s_(s) { assert(!s.ok()); } + virtual Status status() const override { return s_; } + virtual bool Valid() const override { return false; } + virtual void Next() override { assert(false); } + + virtual const Slice blob() override { + assert(false); + return Slice(); + } + virtual const FeatureSet& feature_set() override { + assert(false); + // compiler complains otherwise + return trash_; + } + + private: + Status s_; + FeatureSet trash_; +}; + +// Column families are used to store element's data and spatial indexes. We use +// [default] column family to store the element data. This is the format of +// [default] column family: +// * id (fixed 64 big endian) -> blob (length prefixed slice) feature_set +// (serialized) +// We have one additional column family for each spatial index. The name of the +// column family is [spatial$]. The format is: +// * quad_key (fixed 64 bit big endian) id (fixed 64 bit big endian) -> "" +class SpatialDBImpl : public SpatialDB { + public: + // * db -- base DB that needs to be forwarded to StackableDB + // * data_column_family -- column family used to store the data + // * spatial_indexes -- a list of spatial indexes together with column + // families that correspond to those spatial indexes + // * next_id -- next ID in auto-incrementing ID. This is usually + // `max_id_currenty_in_db + 1` + SpatialDBImpl(DB* db, ColumnFamilyHandle* data_column_family, + const std::vector< + std::pair> + spatial_indexes, + uint64_t next_id) + : SpatialDB(db), + data_column_family_(data_column_family), + next_id_(next_id) { + for (const auto& index : spatial_indexes) { + name_to_index_.insert( + {index.first.name, IndexColumnFamily(index.first, index.second)}); + } + } + + ~SpatialDBImpl() { + for (auto& iter : name_to_index_) { + delete iter.second.column_family; + } + delete data_column_family_; + } + + virtual Status Insert( + const WriteOptions& write_options, const BoundingBox& bbox, + const Slice& blob, const FeatureSet& feature_set, + const std::vector& spatial_indexes) override { + WriteBatch batch; + + if (spatial_indexes.size() == 0) { + return Status::InvalidArgument("Spatial indexes can't be empty"); + } + + uint64_t id = next_id_.fetch_add(1); + + for (const auto& si : spatial_indexes) { + auto itr = name_to_index_.find(si); + if (itr == name_to_index_.end()) { + return Status::InvalidArgument("Can't find index " + si); + } + const auto& spatial_index = itr->second.index; + if (!spatial_index.bbox.Intersects(bbox)) { + continue; + } + BoundingBox tile_bbox = GetTileBoundingBox(spatial_index, bbox); + + for (uint64_t x = tile_bbox.min_x; x <= tile_bbox.max_x; ++x) { + for (uint64_t y = tile_bbox.min_y; y <= tile_bbox.max_y; ++y) { + // see above for format + std::string key; + PutFixed64BigEndian( + &key, GetQuadKeyFromTile(x, y, spatial_index.tile_bits)); + PutFixed64BigEndian(&key, id); + batch.Put(itr->second.column_family, key, Slice()); + } + } + } + + // see above for format + std::string data_key; + PutFixed64BigEndian(&data_key, id); + std::string data_value; + PutLengthPrefixedSlice(&data_value, blob); + feature_set.Serialize(&data_value); + batch.Put(data_column_family_, data_key, data_value); + + return Write(write_options, &batch); + } + + virtual Status Compact() override { + Status s, t; + for (auto& iter : name_to_index_) { + t = CompactRange(iter.second.column_family, nullptr, nullptr); + if (!t.ok()) { + s = t; + } + } + t = CompactRange(data_column_family_, nullptr, nullptr); + if (!t.ok()) { + s = t; + } + return s; + } + + virtual Cursor* Query(const ReadOptions& read_options, + const BoundingBox& bbox, + const std::string& spatial_index) override { + auto itr = name_to_index_.find(spatial_index); + if (itr == name_to_index_.end()) { + return new ErrorCursor(Status::InvalidArgument( + "Spatial index " + spatial_index + " not found")); + } + + std::vector iterators; + Status s = NewIterators(read_options, + {data_column_family_, itr->second.column_family}, + &iterators); + if (!s.ok()) { + return new ErrorCursor(s); + } + + const auto& si = itr->second.index; + return new SpatialIndexCursor(iterators[1], iterators[0], + GetTileBoundingBox(si, bbox), si.tile_bits); + } + + private: + ColumnFamilyHandle* data_column_family_; + struct IndexColumnFamily { + SpatialIndexOptions index; + ColumnFamilyHandle* column_family; + IndexColumnFamily(const SpatialIndexOptions& _index, + ColumnFamilyHandle* _cf) + : index(_index), column_family(_cf) {} + }; + // constant after construction! + std::unordered_map name_to_index_; + + std::atomic next_id_; +}; + +namespace { +Options GetRocksDBOptionsFromOptions(const SpatialDBOptions& options) { + Options rocksdb_options; + rocksdb_options.OptimizeLevelStyleCompaction(); + rocksdb_options.IncreaseParallelism(options.num_threads); + rocksdb_options.block_cache = NewLRUCache(options.cache_size); + if (options.bulk_load) { + rocksdb_options.PrepareForBulkLoad(); + } + return rocksdb_options; +} +} // namespace + +Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes, + SpatialDB** db, bool read_only) { + Options rocksdb_options = GetRocksDBOptionsFromOptions(options); + rocksdb_options.create_if_missing = true; + rocksdb_options.create_missing_column_families = true; + + std::vector column_families; + column_families.push_back(ColumnFamilyDescriptor( + kDefaultColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + + for (const auto& index : spatial_indexes) { + column_families.emplace_back("spatial$" + index.name, + ColumnFamilyOptions(rocksdb_options)); + } + std::vector handles; + DB* base_db; + Status s; + if (read_only) { + s = DB::OpenForReadOnly(DBOptions(rocksdb_options), name, column_families, + &handles, &base_db); + } else { + s = DB::Open(DBOptions(rocksdb_options), name, column_families, &handles, + &base_db); + } + if (!s.ok()) { + return s; + } + + std::vector> + index_cf; + assert(handles.size() == spatial_indexes.size() + 1); + for (size_t i = 0; i < spatial_indexes.size(); ++i) { + index_cf.emplace_back(spatial_indexes[i], handles[i + 1]); + } + uint64_t next_id; + { + // find next_id + Iterator* iter = base_db->NewIterator(ReadOptions(), handles[0]); + iter->SeekToLast(); + if (iter->Valid()) { + uint64_t last_id; + bool ok = GetFixed64BigEndian(iter->key(), &last_id); + if (!ok) { + return Status::Corruption("Invalid key in data column family"); + } + next_id = last_id + 1; + } else { + next_id = 1; + } + delete iter; + } + + *db = new SpatialDBImpl(base_db, handles[0], index_cf, next_id); + return Status::OK(); +} + +} // namespace spatial +} // namespace rocksdb +#endif // ROCKSDB_LITE diff --git a/utilities/spatialdb/spatial_db_test.cc b/utilities/spatialdb/spatial_db_test.cc new file mode 100644 index 000000000..b4d5c23cc --- /dev/null +++ b/utilities/spatialdb/spatial_db_test.cc @@ -0,0 +1,265 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include +#include +#include + +#include "rocksdb/utilities/spatial_db.h" +#include "util/testharness.h" +#include "util/testutil.h" +#include "util/random.h" + +namespace rocksdb { +namespace spatial { + +class SpatialDBTest { + public: + SpatialDBTest() { + dbname_ = test::TmpDir() + "/spatial_db_test"; + DestroyDB(dbname_, Options()); + } + + void AssertCursorResults(BoundingBox bbox, const std::string& index, + const std::vector& blobs) { + Cursor* c = db_->Query(ReadOptions(), bbox, index); + ASSERT_OK(c->status()); + std::multiset b; + for (auto x : blobs) { + b.insert(x); + } + + while (c->Valid()) { + auto itr = b.find(c->blob().ToString()); + ASSERT_TRUE(itr != b.end()); + b.erase(itr); + c->Next(); + } + ASSERT_EQ(b.size(), 0U); + ASSERT_OK(c->status()); + delete c; + } + + std::string dbname_; + SpatialDB* db_; +}; + +TEST(SpatialDBTest, FeatureSetSerializeTest) { + FeatureSet fs; + + fs.Set("a", std::string("b")); + fs.Set("x", static_cast(3)); + fs.Set("y", false); + fs.Set("n", Variant()); // null + fs.Set("m", 3.25); + + ASSERT_TRUE(fs.Find("w") == fs.end()); + ASSERT_TRUE(fs.Find("x") != fs.end()); + ASSERT_TRUE((*fs.Find("x")).second == Variant(static_cast(3))); + ASSERT_TRUE((*fs.Find("y")).second != Variant(true)); + std::set keys({"a", "x", "y", "n", "m"}); + for (const auto& x : fs) { + ASSERT_TRUE(keys.find(x.first) != keys.end()); + keys.erase(x.first); + } + ASSERT_EQ(keys.size(), 0U); + + std::string serialized; + fs.Serialize(&serialized); + + FeatureSet deserialized; + ASSERT_TRUE(deserialized.Deserialize(serialized)); + + ASSERT_TRUE(deserialized.Contains("a")); + ASSERT_EQ(deserialized.Get("a").type(), Variant::kString); + ASSERT_EQ(deserialized.Get("a").get_string(), "b"); + ASSERT_TRUE(deserialized.Contains("x")); + ASSERT_EQ(deserialized.Get("x").type(), Variant::kInt); + ASSERT_EQ(deserialized.Get("x").get_int(), static_cast(3)); + ASSERT_TRUE(deserialized.Contains("y")); + ASSERT_EQ(deserialized.Get("y").type(), Variant::kBool); + ASSERT_EQ(deserialized.Get("y").get_bool(), false); + ASSERT_TRUE(deserialized.Contains("n")); + ASSERT_EQ(deserialized.Get("n").type(), Variant::kNull); + ASSERT_TRUE(deserialized.Contains("m")); + ASSERT_EQ(deserialized.Get("m").type(), Variant::kDouble); + ASSERT_EQ(deserialized.Get("m").get_double(), 3.25); + + // corrupted serialization + serialized = serialized.substr(0, serialized.size() - 3); + deserialized.Clear(); + ASSERT_TRUE(!deserialized.Deserialize(serialized)); +} + +TEST(SpatialDBTest, TestNextID) { + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), + "one", FeatureSet(), {"simple"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(10, 10, 15, 15), + "two", FeatureSet(), {"simple"})); + delete db_; + + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(55, 55, 65, 65), + "three", FeatureSet(), {"simple"})); + + delete db_; + + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + AssertCursorResults(BoundingBox(0, 0, 100, 100), "simple", + {"one", "two", "three"}); + + delete db_; +} + +TEST(SpatialDBTest, FeatureSetTest) { + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + FeatureSet fs; + fs.Set("a", std::string("b")); + fs.Set("c", std::string("d")); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), + "one", fs, {"simple"})); + + Cursor* c = + db_->Query(ReadOptions(), BoundingBox(5, 5, 10, 10), "simple"); + + ASSERT_TRUE(c->Valid()); + ASSERT_EQ(c->blob().compare("one"), 0); + FeatureSet returned = c->feature_set(); + ASSERT_TRUE(returned.Contains("a")); + ASSERT_TRUE(!returned.Contains("b")); + ASSERT_TRUE(returned.Contains("c")); + ASSERT_EQ(returned.Get("a").type(), Variant::kString); + ASSERT_EQ(returned.Get("a").get_string(), "b"); + ASSERT_EQ(returned.Get("c").type(), Variant::kString); + ASSERT_EQ(returned.Get("c").get_string(), "d"); + + c->Next(); + ASSERT_TRUE(!c->Valid()); + + delete c; + delete db_; +} + +TEST(SpatialDBTest, SimpleTest) { + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("index", BoundingBox(0, 0, 128, 128), 3)}, + &db_)); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(33, 17, 63, 79), + "one", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(65, 65, 111, 111), + "two", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(1, 49, 127, 63), + "three", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(20, 100, 21, 101), + "four", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(81, 33, 127, 63), + "five", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(1, 65, 47, 95), + "six", FeatureSet(), {"index"})); + + AssertCursorResults(BoundingBox(33, 17, 47, 31), "index", {"one"}); + AssertCursorResults(BoundingBox(17, 33, 79, 63), "index", + {"one", "three"}); + AssertCursorResults(BoundingBox(17, 81, 63, 111), "index", + {"four", "six"}); + AssertCursorResults(BoundingBox(85, 86, 85, 86), "index", {"two"}); + AssertCursorResults(BoundingBox(33, 1, 127, 111), "index", + {"one", "two", "three", "five", "six"}); + // even though the bounding box doesn't intersect, we got "four" back because + // it's in the same tile + AssertCursorResults(BoundingBox(18, 98, 19, 99), "index", {"four"}); + AssertCursorResults(BoundingBox(130, 130, 131, 131), "index", {}); + AssertCursorResults(BoundingBox(81, 17, 127, 31), "index", {}); + AssertCursorResults(BoundingBox(90, 50, 91, 51), "index", + {"three", "five"}); + + delete db_; +} + +namespace { +std::string RandomStr(Random* rnd) { + std::string r; + for (int k = 0; k < 10; ++k) { + r.push_back(rnd->Uniform(26) + 'a'); + } + return r; +} + +BoundingBox RandomBoundingBox(int limit, Random* rnd, int max_size) { + BoundingBox r; + r.min_x = rnd->Uniform(limit - 1); + r.min_y = rnd->Uniform(limit - 1); + r.max_x = r.min_x + rnd->Uniform(std::min(limit - 1 - r.min_x, max_size)) + 1; + r.max_y = r.min_y + rnd->Uniform(std::min(limit - 1 - r.min_y, max_size)) + 1; + return r; +} + +BoundingBox ScaleBB(BoundingBox b, double step) { + return BoundingBox(b.min_x * step + 1, b.min_y * step + 1, + (b.max_x + 1) * step - 1, + (b.max_y + 1) * step - 1); +} + +} // namespace + +TEST(SpatialDBTest, RandomizedTest) { + Random rnd(301); + std::vector>> elements; + + BoundingBox spatial_index_bounds(0, 0, (1LL << 32), (1LL << 32)); + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("index", spatial_index_bounds, 7)}, &db_)); + double step = (1LL << 32) / (1 << 7); + + for (int i = 0; i < 1000; ++i) { + std::string blob = RandomStr(&rnd); + BoundingBox bbox = RandomBoundingBox(128, &rnd, 10); + ASSERT_OK(db_->Insert(WriteOptions(), ScaleBB(bbox, step), blob, + FeatureSet(), {"index"})); + elements.push_back(make_pair(blob, bbox)); + } + + db_->Compact(); + + for (int i = 0; i < 1000; ++i) { + BoundingBox int_bbox = RandomBoundingBox(128, &rnd, 10); + BoundingBox double_bbox = ScaleBB(int_bbox, step); + std::vector blobs; + for (auto e : elements) { + if (e.second.Intersects(int_bbox)) { + blobs.push_back(e.first); + } + } + AssertCursorResults(double_bbox, "index", blobs); + } + + delete db_; +} + +} // namespace spatial +} // namespace rocksdb + +int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }