diff --git a/Makefile b/Makefile index 1e4638593..07eb4d283 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,7 @@ TESTS = \ redis_test \ reduce_levels_test \ plain_table_db_test \ - prefix_test \ + prefix_test \ simple_table_db_test \ skiplist_test \ stringappend_test \ @@ -99,6 +99,7 @@ TESTS = \ TOOLS = \ sst_dump \ + db_sanity_test \ db_stress \ ldb \ db_repl_stress \ @@ -405,7 +406,7 @@ ldb: tools/ldb.o $(LIBOBJECTS) # --------------------------------------------------------------------------- # Jni stuff # --------------------------------------------------------------------------- -JNI_NATIVE_SOURCES = ./java/rocksjni/rocksjni.cc +JNI_NATIVE_SOURCES = ./java/rocksjni/rocksjni.cc ./java/rocksjni/options.cc JAVA_INCLUDE = -I/usr/lib/jvm/java-openjdk/include/ -I/usr/lib/jvm/java-openjdk/include/linux ROCKSDBJNILIB = ./java/librocksdbjni.so diff --git a/db/c.cc b/db/c.cc index 9084c4a9a..2e55c0ea1 100644 --- a/db/c.cc +++ b/db/c.cc @@ -983,6 +983,26 @@ void rocksdb_options_set_memtable_prefix_bloom_probes( opt->rep.memtable_prefix_bloom_probes = v; } +void rocksdb_options_set_hash_skip_list_rep( + rocksdb_options_t *opt, size_t bucket_count, + int32_t skiplist_height, int32_t skiplist_branching_factor) { + static rocksdb::MemTableRepFactory* factory = 0; + if (!factory) { + factory = rocksdb::NewHashSkipListRepFactory( + bucket_count, skiplist_height, skiplist_branching_factor); + } + opt->rep.memtable_factory.reset(factory); +} + +void rocksdb_options_set_hash_link_list_rep( + rocksdb_options_t *opt, size_t bucket_count) { + static rocksdb::MemTableRepFactory* factory = 0; + if (!factory) { + factory = rocksdb::NewHashLinkListRepFactory(bucket_count); + } + opt->rep.memtable_factory.reset(factory); +} + void rocksdb_options_set_max_successive_merges( rocksdb_options_t* opt, size_t v) { opt->rep.max_successive_merges = v; diff --git a/db/c_test.c b/db/c_test.c index f17e37128..d8fa8eddb 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -433,6 +433,52 @@ int main(int argc, char** argv) { } + StartPhase("prefix"); + { + // Create new database + rocksdb_close(db); + rocksdb_destroy_db(options, dbname, &err); + + rocksdb_options_set_filter_policy(options, rocksdb_filterpolicy_create_bloom(10)); + rocksdb_options_set_prefix_extractor(options, rocksdb_slicetransform_create_fixed_prefix(3)); + rocksdb_options_set_hash_skip_list_rep(options, 50000, 4, 4); + + db = rocksdb_open(options, dbname, &err); + CheckNoError(err); + + rocksdb_put(db, woptions, "foo1", 4, "foo", 3, &err); + CheckNoError(err); + rocksdb_put(db, woptions, "foo2", 4, "foo", 3, &err); + CheckNoError(err); + rocksdb_put(db, woptions, "foo3", 4, "foo", 3, &err); + CheckNoError(err); + rocksdb_put(db, woptions, "bar1", 4, "bar", 3, &err); + CheckNoError(err); + rocksdb_put(db, woptions, "bar2", 4, "bar", 3, &err); + CheckNoError(err); + rocksdb_put(db, woptions, "bar3", 4, "bar", 3, &err); + CheckNoError(err); + + rocksdb_readoptions_set_prefix_seek(roptions, 1); + + rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions); + CheckCondition(!rocksdb_iter_valid(iter)); + + rocksdb_iter_seek(iter, "bar", 3); + rocksdb_iter_get_error(iter, &err); + CheckNoError(err); + CheckCondition(rocksdb_iter_valid(iter)); + + CheckIter(iter, "bar1", "bar"); + rocksdb_iter_next(iter); + CheckIter(iter, "bar2", "bar"); + rocksdb_iter_next(iter); + CheckIter(iter, "bar3", "bar"); + rocksdb_iter_get_error(iter, &err); + CheckNoError(err); + rocksdb_iter_destroy(iter); + } + StartPhase("cleanup"); rocksdb_close(db); rocksdb_options_destroy(options); diff --git a/db/db_iter.cc b/db/db_iter.cc index bbf8a8115..5329e5297 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -39,6 +39,71 @@ static void DumpInternalIter(Iterator* iter) { namespace { +class IterLookupKey { + public: + IterLookupKey() : key_(space_), buf_size_(sizeof(space_)), key_size_(0) {} + + ~IterLookupKey() { Clear(); } + + Slice GetKey() const { + if (key_ != nullptr) { + return Slice(key_, key_size_); + } else { + return Slice(); + } + } + + bool Valid() const { return key_ != nullptr; } + + void Clear() { + if (key_ != nullptr && key_ != space_) { + delete[] key_; + } + key_ = space_; + buf_size_ = sizeof(buf_size_); + } + + // Enlarge the buffer size if needed based on key_size. + // By default, static allocated buffer is used. Once there is a key + // larger than the static allocated buffer, another buffer is dynamically + // allocated, until a larger key buffer is requested. In that case, we + // reallocate buffer and delete the old one. + void EnlargeBufferIfNeeded(size_t key_size) { + // If size is smaller than buffer size, continue using current buffer, + // or the static allocated one, as default + if (key_size > buf_size_) { + // Need to enlarge the buffer. + Clear(); + key_ = new char[key_size]; + buf_size_ = key_size; + } + key_size_ = key_size; + } + + void SetUserKey(const Slice& user_key) { + size_t size = user_key.size(); + EnlargeBufferIfNeeded(size); + memcpy(key_, user_key.data(), size); + } + + void SetInternalKey(const Slice& user_key, SequenceNumber s) { + size_t usize = user_key.size(); + EnlargeBufferIfNeeded(usize + sizeof(uint64_t)); + memcpy(key_, user_key.data(), usize); + EncodeFixed64(key_ + usize, PackSequenceAndType(s, kValueTypeForSeek)); + } + + private: + char* key_; + size_t buf_size_; + size_t key_size_; + char space_[32]; // Avoid allocation for short keys + + // No copying allowed + IterLookupKey(const IterLookupKey&) = delete; + void operator=(const LookupKey&) = delete; +}; + // Memtables and sstables that make the DB representation contain // (userkey,seq,type) => uservalue entries. DBIter // combines multiple entries for the same userkey found in the DB @@ -80,7 +145,7 @@ class DBIter: public Iterator { virtual bool Valid() const { return valid_; } virtual Slice key() const { assert(valid_); - return saved_key_; + return saved_key_.GetKey(); } virtual Slice value() const { assert(valid_); @@ -108,10 +173,6 @@ class DBIter: public Iterator { bool ParseKey(ParsedInternalKey* key); void MergeValuesNewToOld(); - inline void SaveKey(const Slice& k, std::string* dst) { - dst->assign(k.data(), k.size()); - } - inline void ClearSavedValue() { if (saved_value_.capacity() > 1048576) { std::string empty; @@ -130,7 +191,7 @@ class DBIter: public Iterator { SequenceNumber const sequence_; Status status_; - std::string saved_key_; // == current key when direction_==kReverse + IterLookupKey saved_key_; // == current key when direction_==kReverse std::string saved_value_; // == current raw value when direction_==kReverse std::string skip_key_; Direction direction_; @@ -170,7 +231,7 @@ void DBIter::Next() { } if (!iter_->Valid()) { valid_ = false; - saved_key_.clear(); + saved_key_.Clear(); return; } } @@ -210,7 +271,7 @@ void DBIter::FindNextUserEntryInternal(bool skipping) { ParsedInternalKey ikey; if (ParseKey(&ikey) && ikey.sequence <= sequence_) { if (skipping && - user_comparator_->Compare(ikey.user_key, saved_key_) <= 0) { + user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) <= 0) { num_skipped++; // skip this entry BumpPerfCount(&perf_context.internal_key_skipped_count); } else { @@ -219,18 +280,18 @@ void DBIter::FindNextUserEntryInternal(bool skipping) { case kTypeDeletion: // Arrange to skip all upcoming entries for this key since // they are hidden by this deletion. - SaveKey(ikey.user_key, &saved_key_); + saved_key_.SetUserKey(ikey.user_key); skipping = true; num_skipped = 0; BumpPerfCount(&perf_context.internal_delete_skipped_count); break; case kTypeValue: valid_ = true; - SaveKey(ikey.user_key, &saved_key_); + saved_key_.SetUserKey(ikey.user_key); return; case kTypeMerge: // By now, we are sure the current ikey is going to yield a value - SaveKey(ikey.user_key, &saved_key_); + saved_key_.SetUserKey(ikey.user_key); current_entry_is_merged_ = true; valid_ = true; MergeValuesNewToOld(); // Go to a different state machine @@ -248,8 +309,8 @@ void DBIter::FindNextUserEntryInternal(bool skipping) { if (skipping && num_skipped > max_skip_) { num_skipped = 0; std::string last_key; - AppendInternalKey(&last_key, - ParsedInternalKey(Slice(saved_key_), 0, kValueTypeForSeek)); + AppendInternalKey(&last_key, ParsedInternalKey(saved_key_.GetKey(), 0, + kValueTypeForSeek)); iter_->Seek(last_key); RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION); } else { @@ -284,7 +345,7 @@ void DBIter::MergeValuesNewToOld() { continue; } - if (user_comparator_->Compare(ikey.user_key, saved_key_) != 0) { + if (user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) != 0) { // hit the next user key, stop right here break; } @@ -320,7 +381,7 @@ void DBIter::MergeValuesNewToOld() { // a deletion marker. // feed null as the existing value to the merge operator, such that // client can differentiate this scenario and do things accordingly. - user_merge_operator_->FullMerge(saved_key_, nullptr, operands, + user_merge_operator_->FullMerge(saved_key_.GetKey(), nullptr, operands, &saved_value_, logger_); } @@ -339,17 +400,17 @@ void DBIter::Prev() { // iter_ is pointing at the current entry. Scan backwards until // the key changes so we can use the normal reverse scanning code. assert(iter_->Valid()); // Otherwise valid_ would have been false - SaveKey(ExtractUserKey(iter_->key()), &saved_key_); + saved_key_.SetUserKey(ExtractUserKey(iter_->key())); while (true) { iter_->Prev(); if (!iter_->Valid()) { valid_ = false; - saved_key_.clear(); + saved_key_.Clear(); ClearSavedValue(); return; } if (user_comparator_->Compare(ExtractUserKey(iter_->key()), - saved_key_) < 0) { + saved_key_.GetKey()) < 0) { break; } } @@ -370,13 +431,13 @@ void DBIter::FindPrevUserEntry() { ParsedInternalKey ikey; if (ParseKey(&ikey) && ikey.sequence <= sequence_) { if ((value_type != kTypeDeletion) && - user_comparator_->Compare(ikey.user_key, saved_key_) < 0) { + user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) < 0) { // We encountered a non-deleted value in entries for previous keys, break; } value_type = ikey.type; if (value_type == kTypeDeletion) { - saved_key_.clear(); + saved_key_.Clear(); ClearSavedValue(); saved_key_valid = false; } else { @@ -385,7 +446,7 @@ void DBIter::FindPrevUserEntry() { std::string empty; swap(empty, saved_value_); } - SaveKey(ExtractUserKey(iter_->key()), &saved_key_); + saved_key_.SetUserKey(ExtractUserKey(iter_->key())); saved_value_.assign(raw_value.data(), raw_value.size()); } } else { @@ -401,9 +462,9 @@ void DBIter::FindPrevUserEntry() { if (saved_key_valid && num_skipped > max_skip_) { num_skipped = 0; std::string last_key; - AppendInternalKey(&last_key, - ParsedInternalKey(Slice(saved_key_), kMaxSequenceNumber, - kValueTypeForSeek)); + AppendInternalKey(&last_key, ParsedInternalKey(saved_key_.GetKey(), + kMaxSequenceNumber, + kValueTypeForSeek)); iter_->Seek(last_key); RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION); } else { @@ -415,7 +476,7 @@ void DBIter::FindPrevUserEntry() { if (value_type == kTypeDeletion) { // End valid_ = false; - saved_key_.clear(); + saved_key_.Clear(); ClearSavedValue(); direction_ = kForward; } else { @@ -424,12 +485,12 @@ void DBIter::FindPrevUserEntry() { } void DBIter::Seek(const Slice& target) { - saved_key_.clear(); - AppendInternalKey( - &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek)); + saved_key_.Clear(); + // now savved_key is used to store internal key. + saved_key_.SetInternalKey(target, sequence_); StopWatchNano internal_seek_timer(env_, false); StartPerfTimer(&internal_seek_timer); - iter_->Seek(saved_key_); + iter_->Seek(saved_key_.GetKey()); BumpPerfTime(&perf_context.seek_internal_seek_time, &internal_seek_timer); if (iter_->Valid()) { direction_ = kForward; diff --git a/db/db_test.cc b/db/db_test.cc index e0112348c..9f6230a82 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1505,6 +1505,44 @@ TEST(DBTest, IterSeekBeforePrev) { delete iter; } +std::string MakeLongKey(size_t length, char c) { + return std::string(length, c); +} + +TEST(DBTest, IterLongKeys) { + ASSERT_OK(Put(MakeLongKey(20, 0), "0")); + ASSERT_OK(Put(MakeLongKey(32, 2), "2")); + ASSERT_OK(Put("a", "b")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put(MakeLongKey(50, 1), "1")); + ASSERT_OK(Put(MakeLongKey(127, 3), "3")); + ASSERT_OK(Put(MakeLongKey(64, 4), "4")); + auto iter = db_->NewIterator(ReadOptions()); + + // Create a key that needs to be skipped for Seq too new + iter->Seek(MakeLongKey(20, 0)); + ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4"); + delete iter; + + iter = db_->NewIterator(ReadOptions()); + iter->Seek(MakeLongKey(50, 1)); + ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); + delete iter; +} + + TEST(DBTest, IterNextWithNewerSeq) { ASSERT_OK(Put("0", "0")); dbfull()->Flush(FlushOptions()); diff --git a/db/dbformat.cc b/db/dbformat.cc index 43560bc83..2d35d0423 100644 --- a/db/dbformat.cc +++ b/db/dbformat.cc @@ -15,7 +15,7 @@ namespace rocksdb { -static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) { +uint64_t PackSequenceAndType(uint64_t seq, ValueType t) { assert(seq <= kMaxSequenceNumber); assert(t <= kValueTypeForSeek); return (seq << 8) | t; diff --git a/db/dbformat.h b/db/dbformat.h index be46d14a1..f3d714aa2 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -67,6 +67,8 @@ inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { return key.user_key.size() + 8; } +extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t); + // Append the serialization of "key" to *result. extern void AppendInternalKey(std::string* result, const ParsedInternalKey& key); diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 6a3d81aa5..4f1563b94 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -247,7 +247,7 @@ class TestPlainTableFactory : public PlainTableFactory { }; TEST(PlainTableDBTest, Flush) { - for (int bloom_bits = 0; bloom_bits <= 8; bloom_bits += 8) { + for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { for (int total_order = 0; total_order <= 1; total_order++) { Options options = CurrentOptions(); options.create_if_missing = true; @@ -272,7 +272,7 @@ TEST(PlainTableDBTest, Flush) { } TEST(PlainTableDBTest, Flush2) { - for (int bloom_bits = 0; bloom_bits <= 10; bloom_bits += 10) { + for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { for (int total_order = 0; total_order <= 1; total_order++) { bool expect_bloom_not_match = false; Options options = CurrentOptions(); @@ -327,7 +327,7 @@ TEST(PlainTableDBTest, Flush2) { } TEST(PlainTableDBTest, Iterator) { - for (int bloom_bits = 0; bloom_bits <= 8; bloom_bits += 8) { + for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { for (int total_order = 0; total_order <= 1; total_order++) { bool expect_bloom_not_match = false; Options options = CurrentOptions(); @@ -410,17 +410,18 @@ TEST(PlainTableDBTest, Iterator) { // Test Bloom Filter if (bloom_bits > 0) { - // Neither key nor value should exist. - expect_bloom_not_match = true; - iter->Seek("2not000000000bar"); - ASSERT_TRUE(!iter->Valid()); - - // Key doesn't exist any more but prefix exists. - if (total_order) { + if (!total_order) { + // Neither key nor value should exist. + expect_bloom_not_match = true; iter->Seek("2not000000000bar"); ASSERT_TRUE(!iter->Valid()); + ASSERT_EQ("NOT_FOUND", Get("2not000000000bar")); + expect_bloom_not_match = false; + } else { + expect_bloom_not_match = true; + ASSERT_EQ("NOT_FOUND", Get("2not000000000bar")); + expect_bloom_not_match = false; } - expect_bloom_not_match = false; } delete iter; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 23c63f24f..a6bc90085 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -346,6 +346,8 @@ extern void rocksdb_options_set_delete_obsolete_files_period_micros( extern void rocksdb_options_set_source_compaction_factor(rocksdb_options_t*, int); extern void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t*); extern void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t*); +extern void rocksdb_options_set_hash_skip_list_rep(rocksdb_options_t*, size_t, int32_t, int32_t); +extern void rocksdb_options_set_hash_link_list_rep(rocksdb_options_t*, size_t); extern void rocksdb_options_set_max_bytes_for_level_base(rocksdb_options_t* opt, uint64_t n); extern void rocksdb_options_set_stats_dump_period_sec(rocksdb_options_t* opt, unsigned int sec); diff --git a/java/Makefile b/java/Makefile index 794ec1439..8168d3418 100644 --- a/java/Makefile +++ b/java/Makefile @@ -1,4 +1,4 @@ -NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB +NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB org.rocksdb.Options NATIVE_INCLUDE = ./include ROCKSDB_JAR = rocksdbjni.jar @@ -14,4 +14,8 @@ java: sample: javac -cp $(ROCKSDB_JAR) RocksDBSample.java - java -ea -Djava.library.path=.:../ -cp ".:./*" RocksDBSample /tmp/rocksdbjni/ + @rm -rf /tmp/rocksdbjni + @rm -rf /tmp/rocksdbjni_not_found + java -ea -Djava.library.path=.:../ -cp ".:./*" RocksDBSample /tmp/rocksdbjni + @rm -rf /tmp/rocksdbjni + @rm -rf /tmp/rocksdbjni_not_found diff --git a/java/RocksDBSample.java b/java/RocksDBSample.java index b574c23e5..4e06bb29d 100644 --- a/java/RocksDBSample.java +++ b/java/RocksDBSample.java @@ -19,11 +19,37 @@ public class RocksDBSample { return; } String db_path = args[0]; + String db_path_not_found = db_path + "_not_found"; System.out.println("RocksDBSample"); + RocksDB db = null; + Options options = new Options(); + try { + db = RocksDB.open(options, db_path_not_found); + assert(false); + } catch (RocksDBException e) { + System.out.format("caught the expceted exception -- %s\n", e); + assert(db == null); + } + + options.setCreateIfMissing(true); + try { + db = RocksDB.open(options, db_path_not_found); + db.put("hello".getBytes(), "world".getBytes()); + byte[] value = db.get("hello".getBytes()); + assert("world".equals(new String(value))); + } catch (RocksDBException e) { + System.out.format("[ERROR] caught the unexpceted exception -- %s\n", e); + assert(db == null); + assert(false); + } + + // be sure to release the c++ pointer + options.dispose(); + db.close(); try { - RocksDB db = RocksDB.open(db_path); + db = RocksDB.open(db_path); db.put("hello".getBytes(), "world".getBytes()); byte[] value = db.get("hello".getBytes()); System.out.format("Get('hello') = %s\n", @@ -67,13 +93,11 @@ public class RocksDBSample { assert(len == RocksDB.NOT_FOUND); len = db.get(testKey, enoughArray); assert(len == testValue.length); - try { - db.close(); - } catch (IOException e) { - System.err.println(e); - } } catch (RocksDBException e) { System.err.println(e); } + if (db != null) { + db.close(); + } } } diff --git a/java/org/rocksdb/Options.java b/java/org/rocksdb/Options.java new file mode 100644 index 000000000..af759ae8d --- /dev/null +++ b/java/org/rocksdb/Options.java @@ -0,0 +1,69 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * Options to control the behavior of a database. It will be used + * during the creation of a RocksDB (i.e., RocksDB::Open()). + * + * Note that dispose() must be called before an Options instance + * become out-of-scope to release the allocated memory in c++. + */ +public class Options { + /** + * Construct options for opening a RocksDB. + * + * This constructor will create (by allocating a block of memory) + * an rocksdb::Options in the c++ side. + */ + public Options() { + nativeHandle_ = 0; + newOptions(); + } + + /** + * If this value is set to true, then the database will be created + * if it is missing during RocksDB::Open(). + * Default: false + * + * @param flag a flag indicating whether to create a database the + * specified database in RocksDB::Open() operation is missing. + * @see RocksDB::Open() + */ + public void setCreateIfMissing(boolean flag) { + assert(nativeHandle_ != 0); + setCreateIfMissing(nativeHandle_, flag); + } + + /** + * Return true if the create_if_missing flag is set to true. + * If true, the database will be created if it is missing. + * + * @return return true if the create_if_missing flag is set to true. + * @see setCreateIfMissing() + */ + public boolean craeteIfMissing() { + assert(nativeHandle_ != 0); + return createIfMissing(nativeHandle_); + } + + /** + * Release the memory allocated for the current instance + * in the c++ side. + */ + public synchronized void dispose() { + if (nativeHandle_ != 0) { + dispose0(); + } + } + + private native void newOptions(); + private native void dispose0(); + private native void setCreateIfMissing(long handle, boolean flag); + private native boolean createIfMissing(long handle); + + long nativeHandle_; +} diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java index 7e96eff28..b08694081 100644 --- a/java/org/rocksdb/RocksDB.java +++ b/java/org/rocksdb/RocksDB.java @@ -16,16 +16,20 @@ import java.io.IOException; * All methods of this class could potentially throw RocksDBException, which * indicates sth wrong at the rocksdb library side and the call failed. */ -public class RocksDB implements Closeable { +public class RocksDB { public static final int NOT_FOUND = -1; /** * The factory constructor of RocksDB that opens a RocksDB instance given - * the path to the database. + * the path to the database using the default options w/ createIfMissing + * set to true. * * @param path the path to the rocksdb. * @param status an out value indicating the status of the Open(). * @return a rocksdb instance on success, null if the specified rocksdb can * not be opened. + * + * @see Options.setCreateIfMissing() + * @see Options.createIfMissing() */ public static RocksDB open(String path) throws RocksDBException { RocksDB db = new RocksDB(); @@ -33,8 +37,19 @@ public class RocksDB implements Closeable { return db; } - @Override public void close() throws IOException { - if (nativeHandle != 0) { + /** + * The factory constructor of RocksDB that opens a RocksDB instance given + * the path to the database using the specified options and db path. + */ + public static RocksDB open(Options options, String path) + throws RocksDBException { + RocksDB db = new RocksDB(); + db.open(options.nativeHandle_, path); + return db; + } + + public synchronized void close() { + if (nativeHandle_ != 0) { close0(); } } @@ -80,15 +95,20 @@ public class RocksDB implements Closeable { return get(key, key.length); } + @Override protected void finalize() { + close(); + } + /** * Private constructor. */ private RocksDB() { - nativeHandle = -1; + nativeHandle_ = 0; } // native methods private native void open0(String path) throws RocksDBException; + private native void open(long optionsHandle, String path) throws RocksDBException; private native void put( byte[] key, int keyLen, byte[] value, int valueLen) throws RocksDBException; @@ -99,5 +119,5 @@ public class RocksDB implements Closeable { byte[] key, int keyLen) throws RocksDBException; private native void close0(); - private long nativeHandle; + private long nativeHandle_; } diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc new file mode 100644 index 000000000..ef308bc4d --- /dev/null +++ b/java/rocksjni/options.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// This file implements the "bridge" between Java and C++ for rocksdb::Options. + +#include +#include +#include +#include + +#include "include/org_rocksdb_Options.h" +#include "rocksjni/portal.h" +#include "rocksdb/db.h" + +/* + * Class: org_rocksdb_Options + * Method: newOptions + * Signature: ()V + */ +void Java_org_rocksdb_Options_newOptions(JNIEnv* env, jobject jobj) { + rocksdb::Options* op = new rocksdb::Options(); + rocksdb::OptionsJni::setHandle(env, jobj, op); +} + +/* + * Class: org_rocksdb_Options + * Method: dispose0 + * Signature: ()V + */ +void Java_org_rocksdb_Options_dispose0(JNIEnv* env, jobject jobj) { + rocksdb::Options* op = rocksdb::OptionsJni::getHandle(env, jobj); + delete op; + + rocksdb::OptionsJni::setHandle(env, jobj, op); +} + +/* + * Class: org_rocksdb_Options + * Method: setCreateIfMissing + * Signature: (JZ)V + */ +void Java_org_rocksdb_Options_setCreateIfMissing( + JNIEnv* env, jobject jobj, jlong jhandle, jboolean flag) { + reinterpret_cast(jhandle)->create_if_missing = flag; +} + +/* + * Class: org_rocksdb_Options + * Method: createIfMissing + * Signature: (J)Z + */ +jboolean Java_org_rocksdb_Options_createIfMissing( + JNIEnv* env, jobject jobj, jlong jhandle) { + return reinterpret_cast(jhandle)->create_if_missing; +} diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index d51ea2059..a90b82514 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -29,7 +29,7 @@ class RocksDBJni { // that stores the pointer to rocksdb::DB. static jfieldID getHandleFieldID(JNIEnv* env) { static jfieldID fid = env->GetFieldID( - getJClass(env), "nativeHandle", "J"); + getJClass(env), "nativeHandle_", "J"); assert(fid != nullptr); return fid; } @@ -77,5 +77,37 @@ class RocksDBExceptionJni { } }; +class OptionsJni { + public: + // Get the java class id of org.rocksdb.Options. + static jclass getJClass(JNIEnv* env) { + static jclass jclazz = env->FindClass("org/rocksdb/Options"); + assert(jclazz != nullptr); + return jclazz; + } + + // Get the field id of the member variable of org.rocksdb.Options + // that stores the pointer to rocksdb::Options + static jfieldID getHandleFieldID(JNIEnv* env) { + static jfieldID fid = env->GetFieldID( + getJClass(env), "nativeHandle_", "J"); + assert(fid != nullptr); + return fid; + } + + // Get the pointer to rocksdb::Options + static rocksdb::Options* getHandle(JNIEnv* env, jobject jobj) { + return reinterpret_cast( + env->GetLongField(jobj, getHandleFieldID(env))); + } + + // Pass the rocksdb::Options pointer to the java side. + static void setHandle(JNIEnv* env, jobject jobj, rocksdb::Options* op) { + env->SetLongField( + jobj, getHandleFieldID(env), + reinterpret_cast(op)); + } +}; + } // namespace rocksdb #endif // JAVA_ROCKSJNI_PORTAL_H_ diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 3ae53834e..b5d42c0c7 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -15,20 +15,12 @@ #include "rocksjni/portal.h" #include "rocksdb/db.h" -/* - * Class: org_rocksdb_RocksDB - * Method: open0 - * Signature: (Ljava/lang/String;)V - */ -void Java_org_rocksdb_RocksDB_open0( - JNIEnv* env, jobject java_db, jstring jdb_path) { +void rocksdb_open_helper( + JNIEnv* env, jobject java_db, jstring jdb_path, const rocksdb::Options& opt) { rocksdb::DB* db; - rocksdb::Options options; - options.create_if_missing = true; - jboolean isCopy = false; - const char* db_path = env->GetStringUTFChars(jdb_path, &isCopy); - rocksdb::Status s = rocksdb::DB::Open(options, db_path, &db); + const char* db_path = env->GetStringUTFChars(jdb_path, 0); + rocksdb::Status s = rocksdb::DB::Open(opt, db_path, &db); env->ReleaseStringUTFChars(jdb_path, db_path); if (s.ok()) { @@ -38,6 +30,30 @@ void Java_org_rocksdb_RocksDB_open0( rocksdb::RocksDBExceptionJni::ThrowNew(env, s); } +/* + * Class: org_rocksdb_RocksDB + * Method: open0 + * Signature: (Ljava/lang/String;)V + */ +void Java_org_rocksdb_RocksDB_open0( + JNIEnv* env, jobject jdb, jstring jdb_path) { + rocksdb::Options options; + options.create_if_missing = true; + + rocksdb_open_helper(env, jdb, jdb_path, options); +} + +/* + * Class: org_rocksdb_RocksDB + * Method: open + * Signature: (JLjava/lang/String;)V + */ +void Java_org_rocksdb_RocksDB_open( + JNIEnv* env, jobject jdb, jlong jopt_handle, jstring jdb_path) { + auto options = reinterpret_cast(jopt_handle); + rocksdb_open_helper(env, jdb, jdb_path, *options); +} + /* * Class: org_rocksdb_RocksDB * Method: put @@ -147,6 +163,7 @@ jint Java_org_rocksdb_RocksDB_get___3BI_3BI( env->ReleaseByteArrayElements(jvalue, value, JNI_ABORT); return kNotFound; } else if (!s.ok()) { + env->ReleaseByteArrayElements(jvalue, value, JNI_ABORT); // Here since we are throwing a Java exception from c++ side. // As a result, c++ does not know calling this function will in fact // throwing an exception. As a result, the execution flow will @@ -164,10 +181,7 @@ jint Java_org_rocksdb_RocksDB_get___3BI_3BI( memcpy(value, cvalue.c_str(), length); env->ReleaseByteArrayElements(jvalue, value, JNI_COMMIT); - if (cvalue_len > length) { - return static_cast(cvalue_len); - } - return length; + return static_cast(cvalue_len); } /* diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index d521446f8..a6f8bed0e 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -267,14 +267,14 @@ Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) { index_.reset(); - if (options_.prefix_extractor != nullptr) { + if (options_.prefix_extractor.get() != nullptr) { uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey; if (bloom_total_bits > 0) { bloom_.reset(new DynamicBloom(bloom_total_bits, options_.bloom_locality)); } } - if (options_.prefix_extractor == nullptr || kHashTableRatio <= 0) { + if (options_.prefix_extractor.get() == nullptr || kHashTableRatio <= 0) { // Fall back to pure binary search if the user fails to specify a prefix // extractor. index_size_ = 1; @@ -366,7 +366,7 @@ void PlainTableReader::FillIndexes( Status PlainTableReader::PopulateIndex() { // options.prefix_extractor is requried for a hash-based look-up. - if (options_.prefix_extractor == nullptr && kHashTableRatio != 0) { + if (options_.prefix_extractor.get() == nullptr && kHashTableRatio != 0) { return Status::NotSupported( "PlainTable requires a prefix extractor enable prefix hash mode."); } @@ -488,7 +488,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, } bool PlainTableReader::MatchBloom(uint32_t hash) const { - return bloom_ == nullptr || bloom_->MayContainHash(hash); + return bloom_.get() == nullptr || bloom_->MayContainHash(hash); } Slice PlainTableReader::GetPrefix(const ParsedInternalKey& target) const { @@ -676,20 +676,14 @@ void PlainTableIterator::Seek(const Slice& target) { } Slice prefix_slice = table_->GetPrefix(target); - uint32_t prefix_hash; - uint32_t bloom_hash; - if (table_->IsTotalOrderMode()) { - // The total order mode, there is only one hash bucket 0. The bloom filter - // is checked against the whole user key. - prefix_hash = 0; - bloom_hash = GetSliceHash(table_->GetUserKey(target)); - } else { + uint32_t prefix_hash = 0; + // Bloom filter is ignored in total-order mode. + if (!table_->IsTotalOrderMode()) { prefix_hash = GetSliceHash(prefix_slice); - bloom_hash = prefix_hash; - } - if (!table_->MatchBloom(bloom_hash)) { - offset_ = next_offset_ = table_->data_end_offset_; - return; + if (!table_->MatchBloom(prefix_hash)) { + offset_ = next_offset_ = table_->data_end_offset_; + return; + } } bool prefix_match; status_ = table_->GetOffset(target, prefix_slice, prefix_hash, prefix_match, diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h index a93b3dd35..ac2cb8744 100644 --- a/table/plain_table_reader.h +++ b/table/plain_table_reader.h @@ -247,7 +247,7 @@ class PlainTableReader: public TableReader { } bool IsTotalOrderMode() const { - return (options_.prefix_extractor == nullptr); + return (options_.prefix_extractor.get() == nullptr); } // No copying allowed diff --git a/tools/db_sanity_test.cc b/tools/db_sanity_test.cc index f05d6f7ce..9642e9e5f 100644 --- a/tools/db_sanity_test.cc +++ b/tools/db_sanity_test.cc @@ -134,10 +134,10 @@ class SanityTestPlainTableFactory : public SanityTest { explicit SanityTestPlainTableFactory(const std::string& path) : SanityTest(path) { options_.table_factory.reset(NewPlainTableFactory()); - options_.prefix_extractor = NewFixedPrefixTransform(2); + options_.prefix_extractor.reset(NewFixedPrefixTransform(2)); options_.allow_mmap_reads = true; } - ~SanityTestPlainTableFactory() { delete options_.prefix_extractor; } + ~SanityTestPlainTableFactory() {} virtual Options GetOptions() const { return options_; } virtual std::string Name() const { return "PlainTable"; } diff --git a/util/auto_roll_logger.cc b/util/auto_roll_logger.cc index 76d410b26..19c2b8ca3 100644 --- a/util/auto_roll_logger.cc +++ b/util/auto_roll_logger.cc @@ -47,7 +47,11 @@ void AutoRollLogger::Logv(const char* format, va_list ap) { if ((kLogFileTimeToRoll > 0 && LogExpired()) || (kMaxLogFileSize > 0 && logger_->GetLogFileSize() >= kMaxLogFileSize)) { RollLogFile(); - ResetLogger(); + Status s = ResetLogger(); + if (!s.ok()) { + // can't really log the error if creating a new LOG file failed + return; + } } // pin down the current logger_ instance before releasing the mutex. diff --git a/util/env_posix.cc b/util/env_posix.cc index da65d7374..bce9526a6 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -174,7 +174,10 @@ class PosixSequentialFile: public SequentialFile { virtual Status Read(size_t n, Slice* result, char* scratch) { Status s; - size_t r = fread_unlocked(scratch, 1, n, file_); + size_t r = 0; + do { + r = fread_unlocked(scratch, 1, n, file_); + } while (r == 0 && ferror(file_) && errno == EINTR); *result = Slice(scratch, r); if (r < n) { if (feof(file_)) { @@ -231,7 +234,10 @@ class PosixRandomAccessFile: public RandomAccessFile { virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { Status s; - ssize_t r = pread(fd_, scratch, n, static_cast(offset)); + ssize_t r = -1; + do { + r = pread(fd_, scratch, n, static_cast(offset)); + } while (r < 0 && errno == EINTR); *result = Slice(scratch, (r < 0) ? 0 : r); if (r < 0) { // An error: return a non-ok status @@ -680,6 +686,9 @@ class PosixWritableFile : public WritableFile { while (left != 0) { ssize_t done = write(fd_, src, left); if (done < 0) { + if (errno == EINTR) { + continue; + } return IOError(filename_, errno); } TEST_KILL_RANDOM(rocksdb_kill_odds); @@ -727,6 +736,9 @@ class PosixWritableFile : public WritableFile { while (left != 0) { ssize_t done = write(fd_, src, left); if (done < 0) { + if (errno == EINTR) { + continue; + } return IOError(filename_, errno); } TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2); @@ -849,6 +861,9 @@ class PosixRandomRWFile : public RandomRWFile { while (left != 0) { ssize_t done = pwrite(fd_, src, left, offset); if (done < 0) { + if (errno == EINTR) { + continue; + } return IOError(filename_, errno); } @@ -1003,7 +1018,10 @@ class PosixEnv : public Env { unique_ptr* result, const EnvOptions& options) { result->reset(); - FILE* f = fopen(fname.c_str(), "r"); + FILE* f = nullptr; + do { + f = fopen(fname.c_str(), "r"); + } while (f == nullptr && errno == EINTR); if (f == nullptr) { *result = nullptr; return IOError(fname, errno); @@ -1051,7 +1069,10 @@ class PosixEnv : public Env { const EnvOptions& options) { result->reset(); Status s; - const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); + int fd = -1; + do { + fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); + } while (fd < 0 && errno == EINTR); if (fd < 0) { s = IOError(fname, errno); } else { diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc index b1a865d5e..d273947a8 100644 --- a/util/thread_local_test.cc +++ b/util/thread_local_test.cc @@ -152,12 +152,12 @@ TEST(ThreadLocalTest, ConcurrentReadWriteTest) { ThreadLocalPtr tls2; port::Mutex mu1; port::CondVar cv1(&mu1); - Params p1(&mu1, &cv1, nullptr, 128); + Params p1(&mu1, &cv1, nullptr, 16); p1.tls2 = &tls2; port::Mutex mu2; port::CondVar cv2(&mu2); - Params p2(&mu2, &cv2, nullptr, 128); + Params p2(&mu2, &cv2, nullptr, 16); p2.doWrite = true; p2.tls2 = &tls2;