diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index bbac4aa64..b7ff97b34 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -12,7 +12,9 @@ #include "db/table_properties_collector.h" #include "rocksdb/table_properties.h" #include "rocksdb/table.h" +#include "rocksdb/plain_table_factory.h" #include "table/block_based_table_factory.h" +#include "table/meta_blocks.h" #include "util/coding.h" #include "util/testharness.h" #include "util/testutil.h" @@ -20,8 +22,6 @@ namespace rocksdb { class TablePropertiesTest { - private: - unique_ptr table_reader_; }; // TODO(kailiu) the following classes should be moved to some more general @@ -93,22 +93,6 @@ void MakeBuilder( options.compression)); } -void OpenTable( - const Options& options, - const std::string& contents, - std::unique_ptr* table_reader) { - - std::unique_ptr file(new FakeRandomeAccessFile(contents)); - auto s = options.table_factory->GetTableReader( - options, - EnvOptions(), - std::move(file), - contents.size(), - table_reader - ); - ASSERT_OK(s); -} - // Collects keys that starts with "A" in a table. class RegularKeysStartWithA: public TablePropertiesCollector { public: @@ -141,23 +125,66 @@ class RegularKeysStartWithA: public TablePropertiesCollector { uint32_t count_ = 0; }; -TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) { - Options options; - +extern uint64_t kBlockBasedTableMagicNumber; +extern uint64_t kPlainTableMagicNumber; +void TestCustomizedTablePropertiesCollector( + uint64_t magic_number, + bool encode_as_internal, + const Options& options) { // make sure the entries will be inserted with order. std::map kvs = { - {"About", "val5"}, // starts with 'A' - {"Abstract", "val2"}, // starts with 'A' - {"Around", "val7"}, // starts with 'A' - {"Beyond", "val3"}, - {"Builder", "val1"}, - {"Cancel", "val4"}, - {"Find", "val6"}, + {"About ", "val5"}, // starts with 'A' + {"Abstract", "val2"}, // starts with 'A' + {"Around ", "val7"}, // starts with 'A' + {"Beyond ", "val3"}, + {"Builder ", "val1"}, + {"Cancel ", "val4"}, + {"Find ", "val6"}, }; + // -- Step 1: build table + std::unique_ptr builder; + std::unique_ptr writable; + MakeBuilder(options, &writable, &builder); + + for (const auto& kv : kvs) { + if (encode_as_internal) { + InternalKey ikey(kv.first, 0, ValueType::kTypeValue); + builder->Add(ikey.Encode(), kv.second); + } else { + builder->Add(kv.first, kv.second); + } + } + ASSERT_OK(builder->Finish()); + + // -- Step 2: Read properties + FakeRandomeAccessFile readable(writable->contents()); + TableProperties props; + Status s = ReadTableProperties( + &readable, + writable->contents().size(), + magic_number, + Env::Default(), + nullptr, + &props + ); + ASSERT_OK(s); + + auto user_collected = props.user_collected_properties; + + ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest")); + + uint32_t starts_with_A = 0; + Slice key(user_collected.at("Count")); + ASSERT_TRUE(GetVarint32(&key, &starts_with_A)); + ASSERT_EQ(3u, starts_with_A); +} + +TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) { // Test properties collectors with internal keys or regular keys + // for block based table for (bool encode_as_internal : { true, false }) { - // -- Step 1: build table + Options options; auto collector = new RegularKeysStartWithA(); if (encode_as_internal) { options.table_properties_collectors = { @@ -167,95 +194,112 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) { options.table_properties_collectors.resize(1); options.table_properties_collectors[0].reset(collector); } - std::unique_ptr builder; - std::unique_ptr writable; - MakeBuilder(options, &writable, &builder); + TestCustomizedTablePropertiesCollector( + kBlockBasedTableMagicNumber, + encode_as_internal, + options + ); + } - for (const auto& kv : kvs) { - if (encode_as_internal) { - InternalKey ikey(kv.first, 0, ValueType::kTypeValue); - builder->Add(ikey.Encode(), kv.second); - } else { - builder->Add(kv.first, kv.second); - } - } - ASSERT_OK(builder->Finish()); + // test plain table + Options options; + options.table_properties_collectors.push_back( + std::make_shared() + ); + options.table_factory = std::make_shared(8, 8, 0); + TestCustomizedTablePropertiesCollector( + kPlainTableMagicNumber, true, options + ); +} - // -- Step 2: Open table - std::unique_ptr table_reader; - OpenTable(options, writable->contents(), &table_reader); - const auto& properties = - table_reader->GetTableProperties().user_collected_properties; +void TestInternalKeyPropertiesCollector( + uint64_t magic_number, + bool sanitized, + std::shared_ptr table_factory) { + InternalKey keys[] = { + InternalKey("A ", 0, ValueType::kTypeValue), + InternalKey("B ", 0, ValueType::kTypeValue), + InternalKey("C ", 0, ValueType::kTypeValue), + InternalKey("W ", 0, ValueType::kTypeDeletion), + InternalKey("X ", 0, ValueType::kTypeDeletion), + InternalKey("Y ", 0, ValueType::kTypeDeletion), + InternalKey("Z ", 0, ValueType::kTypeDeletion), + }; - ASSERT_EQ("Rocksdb", properties.at("TablePropertiesTest")); + std::unique_ptr builder; + std::unique_ptr writable; + Options options; + options.table_factory = table_factory; + if (sanitized) { + options.table_properties_collectors = { + std::make_shared() + }; + // with sanitization, even regular properties collector will be able to + // handle internal keys. + auto comparator = options.comparator; + // HACK: Set options.info_log to avoid writing log in + // SanitizeOptions(). + options.info_log = std::make_shared(); + options = SanitizeOptions( + "db", // just a place holder + nullptr, // with skip internal key comparator + nullptr, // don't care filter policy + options + ); + options.comparator = comparator; + } else { + options.table_properties_collectors = { + std::make_shared() + }; + } + MakeBuilder(options, &writable, &builder); + for (const auto& k : keys) { + builder->Add(k.Encode(), "val"); + } + + ASSERT_OK(builder->Finish()); + + FakeRandomeAccessFile readable(writable->contents()); + TableProperties props; + Status s = ReadTableProperties( + &readable, + writable->contents().size(), + magic_number, + Env::Default(), + nullptr, + &props + ); + ASSERT_OK(s); + + auto user_collected = props.user_collected_properties; + uint64_t deleted = GetDeletedKeys(user_collected); + ASSERT_EQ(4u, deleted); + + if (sanitized) { uint32_t starts_with_A = 0; - Slice key(properties.at("Count")); + Slice key(user_collected.at("Count")); ASSERT_TRUE(GetVarint32(&key, &starts_with_A)); - ASSERT_EQ(3u, starts_with_A); + ASSERT_EQ(1u, starts_with_A); } } TEST(TablePropertiesTest, InternalKeyPropertiesCollector) { - InternalKey keys[] = { - InternalKey("A", 0, ValueType::kTypeValue), - InternalKey("B", 0, ValueType::kTypeValue), - InternalKey("C", 0, ValueType::kTypeValue), - InternalKey("W", 0, ValueType::kTypeDeletion), - InternalKey("X", 0, ValueType::kTypeDeletion), - InternalKey("Y", 0, ValueType::kTypeDeletion), - InternalKey("Z", 0, ValueType::kTypeDeletion), - }; - - for (bool sanitized : { false, true }) { - std::unique_ptr builder; - std::unique_ptr writable; - Options options; - if (sanitized) { - options.table_properties_collectors = { - std::make_shared() - }; - // with sanitization, even regular properties collector will be able to - // handle internal keys. - auto comparator = options.comparator; - // HACK: Set options.info_log to avoid writing log in - // SanitizeOptions(). - options.info_log = std::make_shared(); - options = SanitizeOptions( - "db", // just a place holder - nullptr, // with skip internal key comparator - nullptr, // don't care filter policy - options - ); - options.comparator = comparator; - } else { - options.table_properties_collectors = { - std::make_shared() - }; - } - - MakeBuilder(options, &writable, &builder); - for (const auto& k : keys) { - builder->Add(k.Encode(), "val"); - } - - ASSERT_OK(builder->Finish()); - - std::unique_ptr table_reader; - OpenTable(options, writable->contents(), &table_reader); - const auto& properties = - table_reader->GetTableProperties().user_collected_properties; - - uint64_t deleted = GetDeletedKeys(properties); - ASSERT_EQ(4u, deleted); - - if (sanitized) { - uint32_t starts_with_A = 0; - Slice key(properties.at("Count")); - ASSERT_TRUE(GetVarint32(&key, &starts_with_A)); - ASSERT_EQ(1u, starts_with_A); - } - } + TestInternalKeyPropertiesCollector( + kBlockBasedTableMagicNumber, + true /* sanitize */, + std::make_shared() + ); + TestInternalKeyPropertiesCollector( + kBlockBasedTableMagicNumber, + true /* not sanitize */, + std::make_shared() + ); + TestInternalKeyPropertiesCollector( + kPlainTableMagicNumber, + false /* not sanitize */, + std::make_shared(8, 8, 0) + ); } } // namespace rocksdb diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 75c8bcc16..c2570acf6 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -64,6 +64,7 @@ struct TablePropertiesNames { static const std::string kFilterPolicy; }; +extern const std::string kPropertiesBlock; // `TablePropertiesCollector` provides the mechanism for users to collect // their own interested properties. This class is essentially a collection diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 61ac193c9..e81d99ede 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -387,7 +387,7 @@ Status BlockBasedTableBuilder::Finish() { &properties_block_handle ); - meta_index_builer.Add(BlockBasedTable::kPropertiesBlock, + meta_index_builer.Add(kPropertiesBlock, properties_block_handle); } // end of properties block writing @@ -459,7 +459,5 @@ uint64_t BlockBasedTableBuilder::FileSize() const { const std::string BlockBasedTable::kFilterBlockPrefix = "filter."; -const std::string BlockBasedTable::kPropertiesBlock = - "rocksdb.properties"; } // namespace rocksdb diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 080daa5a7..dd6d0e7ae 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -21,6 +21,7 @@ #include "table/block.h" #include "table/filter_block.h" #include "table/format.h" +#include "table/meta_blocks.h" #include "table/two_level_iterator.h" #include "util/coding.h" @@ -250,10 +251,16 @@ Status BlockBasedTable::Open(const Options& options, // Read the properties meta_iter->Seek(kPropertiesBlock); - if (meta_iter->Valid() && meta_iter->key() == Slice(kPropertiesBlock)) { + if (meta_iter->Valid() && meta_iter->key() == kPropertiesBlock) { s = meta_iter->status(); if (s.ok()) { - s = ReadProperties(meta_iter->value(), rep, &rep->table_properties); + s = ReadProperties( + meta_iter->value(), + rep->file.get(), + rep->options.env, + rep->options.info_log.get(), + &rep->table_properties + ); } if (!s.ok()) { @@ -401,96 +408,6 @@ FilterBlockReader* BlockBasedTable::ReadFilter ( rep->options, block.data, block.heap_allocated); } -Status BlockBasedTable::ReadProperties( - const Slice& handle_value, Rep* rep, TableProperties* table_properties) { - assert(table_properties); - - Slice v = handle_value; - BlockHandle handle; - if (!handle.DecodeFrom(&v).ok()) { - return Status::InvalidArgument("Failed to decode properties block handle"); - } - - BlockContents block_contents; - Status s = ReadBlockContents( - rep->file.get(), - ReadOptions(), - handle, - &block_contents, - rep->options.env, - false - ); - - if (!s.ok()) { - return s; - } - - Block properties_block(block_contents); - std::unique_ptr iter( - properties_block.NewIterator(BytewiseComparator()) - ); - - // All pre-defined properties of type uint64_t - std::unordered_map predefined_uint64_properties = { - { TablePropertiesNames::kDataSize, - &table_properties->data_size }, - { TablePropertiesNames::kIndexSize, - &table_properties->index_size }, - { TablePropertiesNames::kFilterSize, - &table_properties->filter_size }, - { TablePropertiesNames::kRawKeySize, - &table_properties->raw_key_size }, - { TablePropertiesNames::kRawValueSize, - &table_properties->raw_value_size }, - { TablePropertiesNames::kNumDataBlocks, - &table_properties->num_data_blocks }, - { TablePropertiesNames::kNumEntries, - &table_properties->num_entries }, - }; - - std::string last_key; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - s = iter->status(); - if (!s.ok()) { - break; - } - - auto key = iter->key().ToString(); - // properties block is strictly sorted with no duplicate key. - assert( - last_key.empty() || - BytewiseComparator()->Compare(key, last_key) > 0 - ); - last_key = key; - - auto raw_val = iter->value(); - auto pos = predefined_uint64_properties.find(key); - - if (pos != predefined_uint64_properties.end()) { - // handle predefined rocksdb properties - uint64_t val; - if (!GetVarint64(&raw_val, &val)) { - // skip malformed value - auto error_msg = - "[Warning] detect malformed value in properties meta-block:" - "\tkey: " + key + "\tval: " + raw_val.ToString(); - Log(rep->options.info_log, "%s", error_msg.c_str()); - continue; - } - *(pos->second) = val; - } else if (key == TablePropertiesNames::kFilterPolicy) { - table_properties->filter_policy_name = raw_val.ToString(); - } else { - // handle user-collected - table_properties->user_collected_properties.insert( - std::make_pair(key, raw_val.ToString()) - ); - } - } - - return s; -} - Status BlockBasedTable::GetBlock( const BlockBasedTable* table, const BlockHandle& handle, diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 05811b5d3..3485a4534 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -38,7 +38,6 @@ using std::unique_ptr; class BlockBasedTable : public TableReader { public: static const std::string kFilterBlockPrefix; - static const std::string kPropertiesBlock; // Attempt to open the table that is stored in bytes [0..file_size) // of "file", and read the metadata entries necessary to allow @@ -142,7 +141,6 @@ class BlockBasedTable : public TableReader { void ReadMeta(const Footer& footer); void ReadFilter(const Slice& filter_handle_value); - static Status ReadProperties(const Slice& handle_value, Rep* rep); // Read the meta block from sst. static Status ReadMetaBlock( @@ -156,10 +154,6 @@ class BlockBasedTable : public TableReader { Rep* rep, size_t* filter_size = nullptr); - // Read the table properties from properties block. - static Status ReadProperties( - const Slice& handle_value, Rep* rep, TableProperties* properties); - static void SetupCacheKeyPrefix(Rep* rep); explicit BlockBasedTable(Rep* rep) : diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index df3ee5dae..95eb6c4ab 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -8,6 +8,7 @@ #include #include "rocksdb/table_properties.h" +#include "table/block.h" #include "table/format.h" #include "util/coding.h" @@ -131,4 +132,147 @@ bool NotifyCollectTableCollectorsOnFinish( return all_succeeded; } +Status ReadProperties( + const Slice& handle_value, + RandomAccessFile* file, + Env* env, + Logger* logger, + TableProperties* table_properties) { + assert(table_properties); + + Slice v = handle_value; + BlockHandle handle; + if (!handle.DecodeFrom(&v).ok()) { + return Status::InvalidArgument("Failed to decode properties block handle"); + } + + BlockContents block_contents; + Status s = ReadBlockContents( + file, + ReadOptions(), + handle, + &block_contents, + env, + false + ); + + if (!s.ok()) { + return s; + } + + Block properties_block(block_contents); + std::unique_ptr iter( + properties_block.NewIterator(BytewiseComparator()) + ); + + // All pre-defined properties of type uint64_t + std::unordered_map predefined_uint64_properties = { + { TablePropertiesNames::kDataSize, &table_properties->data_size }, + { TablePropertiesNames::kIndexSize, &table_properties->index_size }, + { TablePropertiesNames::kFilterSize, &table_properties->filter_size }, + { TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size }, + { TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size }, + { TablePropertiesNames::kNumDataBlocks, + &table_properties->num_data_blocks }, + { TablePropertiesNames::kNumEntries, &table_properties->num_entries }, + }; + + std::string last_key; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + s = iter->status(); + if (!s.ok()) { + break; + } + + auto key = iter->key().ToString(); + // properties block is strictly sorted with no duplicate key. + assert( + last_key.empty() || + BytewiseComparator()->Compare(key, last_key) > 0 + ); + last_key = key; + + auto raw_val = iter->value(); + auto pos = predefined_uint64_properties.find(key); + + if (pos != predefined_uint64_properties.end()) { + // handle predefined rocksdb properties + uint64_t val; + if (!GetVarint64(&raw_val, &val)) { + // skip malformed value + auto error_msg = + "[Warning] detect malformed value in properties meta-block:" + "\tkey: " + key + "\tval: " + raw_val.ToString(); + Log(logger, "%s", error_msg.c_str()); + continue; + } + *(pos->second) = val; + } else if (key == TablePropertiesNames::kFilterPolicy) { + table_properties->filter_policy_name = raw_val.ToString(); + } else { + // handle user-collected properties + table_properties->user_collected_properties.insert( + std::make_pair(key, raw_val.ToString()) + ); + } + } + + return s; +} + +Status ReadTableProperties( + RandomAccessFile* file, + uint64_t file_size, + uint64_t table_magic_number, + Env* env, + Logger* info_log, + TableProperties* properties) { + // -- Read metaindex block + Footer footer(table_magic_number); + auto s = ReadFooterFromFile(file, file_size, &footer); + if (!s.ok()) { + return s; + } + + auto metaindex_handle = footer.metaindex_handle(); + BlockContents metaindex_contents; + s = ReadBlockContents( + file, + ReadOptions(), + metaindex_handle, + &metaindex_contents, + env, + false + ); + if (!s.ok()) { + return s; + } + Block metaindex_block(metaindex_contents); + std::unique_ptr meta_iter( + metaindex_block.NewIterator(BytewiseComparator()) + ); + + // -- Read property block + meta_iter->Seek(kPropertiesBlock); + TableProperties table_properties; + if (meta_iter->Valid() && + meta_iter->key() == kPropertiesBlock && + meta_iter->status().ok()) { + s = ReadProperties( + meta_iter->value(), + file, + env, + info_log, + properties + ); + } else { + s = Status::Corruption( + "Unable to read the property block from the plain table" + ); + } + + return s; +} + + } // namespace rocksdb diff --git a/table/meta_blocks.h b/table/meta_blocks.h index d0718ec07..a773c7b38 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -15,9 +15,11 @@ namespace rocksdb { -class BlockHandle; class BlockBuilder; +class BlockHandle; +class Env; class Logger; +class RandomAccessFile; struct TableProperties; // An STL style comparator that does the bytewise comparator comparasion @@ -49,11 +51,6 @@ class MetaIndexBuilder { Slice Finish(); private: - // * Key: meta block name - // * Value: block handle to that meta block - struct Rep; - Rep* rep_; - // store the sorted key/handle of the metablocks. BytewiseSortedMap meta_block_handles_; std::unique_ptr meta_index_block_; @@ -103,4 +100,21 @@ bool NotifyCollectTableCollectorsOnFinish( Logger* info_log, PropertyBlockBuilder* builder); +// Read the properties from the table. +Status ReadProperties( + const Slice& handle_value, + RandomAccessFile* file, + Env* env, + Logger* logger, + TableProperties* table_properties); + +// Directly read the properties from the properties block of a plain table. +Status ReadTableProperties( + RandomAccessFile* file, + uint64_t file_size, + uint64_t table_magic_number, + Env* env, + Logger* info_log, + TableProperties* properties); + } // namespace rocksdb diff --git a/table/plain_table_builder.cc b/table/plain_table_builder.cc index 30d7e7d6e..970599a9b 100644 --- a/table/plain_table_builder.cc +++ b/table/plain_table_builder.cc @@ -14,12 +14,40 @@ #include "table/block_builder.h" #include "table/filter_block.h" #include "table/format.h" +#include "table/meta_blocks.h" #include "util/coding.h" #include "util/crc32c.h" #include "util/stop_watch.h" namespace rocksdb { +namespace { + +// a utility that helps writing block content to the file +// @offset will advance if @block_contents was successfully written. +// @block_handle the block handle this particular block. +Status WriteBlock( + const Slice& block_contents, + WritableFile* file, + uint64_t* offset, + BlockHandle* block_handle) { + block_handle->set_offset(*offset); + block_handle->set_size(block_contents.size()); + Status s = file->Append(block_contents); + + if (s.ok()) { + *offset += block_contents.size(); + } + return s; +} + +} // namespace + +// kPlainTableMagicNumber was picked by running +// echo rocksdb.plain.table | sha1sum +// and taking the leading 64 bits. +extern const uint64_t kPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull; + PlainTableBuilder::PlainTableBuilder(const Options& options, WritableFile* file, int user_key_size, int key_prefix_len) : @@ -28,15 +56,18 @@ PlainTableBuilder::PlainTableBuilder(const Options& options, PutFixed32(&version, 1 | 0x80000000); file_->Append(Slice(version)); offset_ = 4; + + // for plain table, we put all the data in a big chuck. + properties_.num_data_blocks = 1; + // emphasize that currently plain table doesn't have persistent index or + // filter block. + properties_.index_size = 0; + properties_.filter_size = 0; } PlainTableBuilder::~PlainTableBuilder() { } -Status PlainTableBuilder::ChangeOptions(const Options& options) { - return Status::OK(); -} - void PlainTableBuilder::Add(const Slice& key, const Slice& value) { assert((int) key.size() == GetInternalKeyLength()); @@ -52,7 +83,17 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) { file_->Append(value); offset_ += value_size + size.length(); - num_entries_++; + properties_.num_entries++; + properties_.raw_key_size += key.size(); + properties_.raw_value_size += value.size(); + + // notify property collectors + NotifyCollectTableCollectorsOnAdd( + key, + value, + options_.table_properties_collectors, + options_.info_log.get() + ); } Status PlainTableBuilder::status() const { @@ -62,7 +103,63 @@ Status PlainTableBuilder::status() const { Status PlainTableBuilder::Finish() { assert(!closed_); closed_ = true; - return Status::OK(); + + properties_.data_size = offset_; + + // Write the following blocks + // 1. [meta block: properties] + // 2. [metaindex block] + // 3. [footer] + MetaIndexBuilder meta_index_builer; + + PropertyBlockBuilder property_block_builder; + // -- Add basic properties + property_block_builder.AddTableProperty(properties_); + + // -- Add user collected properties + NotifyCollectTableCollectorsOnFinish( + options_.table_properties_collectors, + options_.info_log.get(), + &property_block_builder + ); + + // -- Write property block + BlockHandle property_block_handle; + auto s = WriteBlock( + property_block_builder.Finish(), + file_, + &offset_, + &property_block_handle + ); + if (!s.ok()) { + return s; + } + meta_index_builer.Add(kPropertiesBlock, property_block_handle); + + // -- write metaindex block + BlockHandle metaindex_block_handle; + s = WriteBlock( + meta_index_builer.Finish(), + file_, + &offset_, + &metaindex_block_handle + ); + if (!s.ok()) { + return s; + } + + // Write Footer + Footer footer(kPlainTableMagicNumber); + footer.set_metaindex_handle(metaindex_block_handle); + footer.set_index_handle(BlockHandle::NullBlockHandle()); + std::string footer_encoding; + footer.EncodeTo(&footer_encoding); + s = file_->Append(footer_encoding); + if (s.ok()) { + offset_ += footer_encoding.size(); + } + + return s; } void PlainTableBuilder::Abandon() { @@ -70,7 +167,7 @@ void PlainTableBuilder::Abandon() { } uint64_t PlainTableBuilder::NumEntries() const { - return num_entries_; + return properties_.num_entries; } uint64_t PlainTableBuilder::FileSize() const { diff --git a/table/plain_table_builder.h b/table/plain_table_builder.h index a994b337c..b8a2bbe3b 100644 --- a/table/plain_table_builder.h +++ b/table/plain_table_builder.h @@ -32,14 +32,6 @@ public: // REQUIRES: Either Finish() or Abandon() has been called. ~PlainTableBuilder(); - // Change the options used by this builder. Note: only some of the - // option fields can be changed after construction. If a field is - // not allowed to change dynamically and its value in the structure - // passed to the constructor is different from its value in the - // structure passed to this method, this method will return an error - // without changing any fields. - Status ChangeOptions(const Options& options); - // Add key,value to the table being constructed. // REQUIRES: key is after any previously added key according to comparator. // REQUIRES: Finish(), Abandon() have not been called @@ -72,7 +64,7 @@ private: WritableFile* file_; uint64_t offset_ = 0; Status status_; - uint64_t num_entries_ = 0; + TableProperties properties_; const size_t user_key_size_; bool closed_ = false; // Either Finish() or Abandon() has been called. diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index e808948ab..1562f7cfd 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -19,6 +19,7 @@ #include "table/block.h" #include "table/filter_block.h" #include "table/format.h" +#include "table/meta_blocks.h" #include "table/two_level_iterator.h" #include "util/coding.h" @@ -41,6 +42,7 @@ public: namespace rocksdb { +extern const uint64_t kPlainTableMagicNumber; static uint32_t getBucketId(Slice const& s, size_t prefix_len, uint32_t num_buckets) { return MurmurHash(s.data(), prefix_len, 397) % num_buckets; @@ -49,18 +51,16 @@ static uint32_t getBucketId(Slice const& s, size_t prefix_len, PlainTableReader::PlainTableReader(const EnvOptions& storage_options, uint64_t file_size, int user_key_size, int key_prefix_len, int bloom_bits_per_key, - double hash_table_ratio) : + double hash_table_ratio, + const TableProperties& table_properties) : hash_table_size_(0), soptions_(storage_options), file_size_(file_size), user_key_size_(user_key_size), key_prefix_len_(key_prefix_len), - hash_table_ratio_(hash_table_ratio) { - if (bloom_bits_per_key > 0) { - filter_policy_ = NewBloomFilterPolicy(bloom_bits_per_key); - } else { - filter_policy_ = nullptr; - } - hash_table_ = nullptr; - data_start_offset_ = 0; - data_end_offset_ = file_size; + hash_table_ratio_(hash_table_ratio), + filter_policy_(bloom_bits_per_key > 0 ? + NewBloomFilterPolicy(bloom_bits_per_key) : nullptr), + table_properties_(table_properties), + data_start_offset_(0), + data_end_offset_(table_properties_.data_size) { } PlainTableReader::~PlainTableReader() { @@ -87,19 +87,38 @@ Status PlainTableReader::Open(const Options& options, return Status::NotSupported("File is too large for PlainTableReader!"); } - PlainTableReader* t = new PlainTableReader(soptions, file_size, - user_key_size, - key_prefix_len, - bloom_num_bits, - hash_table_ratio); - t->file_ = std::move(file); - t->options_ = options; - Status s = t->PopulateIndex(file_size); + TableProperties table_properties; + auto s = ReadTableProperties( + file.get(), + file_size, + kPlainTableMagicNumber, + options.env, + options.info_log.get(), + &table_properties + ); if (!s.ok()) { - delete t; return s; } - table_reader->reset(t); + + std::unique_ptr new_reader(new PlainTableReader( + soptions, + file_size, + user_key_size, + key_prefix_len, + bloom_num_bits, + hash_table_ratio, + table_properties + )); + new_reader->file_ = std::move(file); + new_reader->options_ = options; + + // -- Populate Index + s = new_reader->PopulateIndex(); + if (!s.ok()) { + return s; + } + + *table_reader = std::move(new_reader); return s; } @@ -114,7 +133,7 @@ Iterator* PlainTableReader::NewIterator(const ReadOptions& options) { return new PlainTableIterator(this); } -Status PlainTableReader::PopulateIndex(uint64_t file_size) { +Status PlainTableReader::PopulateIndex() { // Get mmapped memory to file_data_. Status s = file_->Read(0, file_size_, &file_data_, nullptr); if (!s.ok()) { @@ -124,7 +143,6 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { version_ ^= 0x80000000; assert(version_ == 1); data_start_offset_ = 4; - data_end_offset_ = file_size; Slice key_slice; Slice key_prefix_slice; @@ -140,7 +158,7 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { // are in order. std::vector> prefix_index_pairs; std::string current_prefix_index; - while (pos < file_size) { + while (pos < data_end_offset_) { uint32_t key_offset = pos; status_ = Next(pos, &key_slice, &value_slice, pos); key_prefix_slice = Slice(key_slice.data(), key_prefix_len_); diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h index eea8adfe6..26a506d14 100644 --- a/table/plain_table_reader.h +++ b/table/plain_table_reader.h @@ -78,16 +78,21 @@ public: void SetupForCompaction(); TableProperties& GetTableProperties() { - return tbl_props; + return table_properties_; } - PlainTableReader(const EnvOptions& storage_options, uint64_t file_size, - int user_key_size, int key_prefix_len, int bloom_num_bits, - double hash_table_ratio); + PlainTableReader( + const EnvOptions& storage_options, + uint64_t file_size, + int user_key_size, + int key_prefix_len, + int bloom_num_bits, + double hash_table_ratio, + const TableProperties& table_properties); ~PlainTableReader(); private: - uint32_t* hash_table_; + uint32_t* hash_table_ = nullptr; int hash_table_size_; std::string sub_index_; @@ -99,8 +104,6 @@ private: Slice file_data_; uint32_t version_; uint32_t file_size_; - uint32_t data_start_offset_; - uint32_t data_end_offset_; const size_t user_key_size_; const size_t key_prefix_len_; const double hash_table_ratio_; @@ -108,7 +111,9 @@ private: std::string filter_str_; Slice filter_slice_; - TableProperties tbl_props; + TableProperties table_properties_; + uint32_t data_start_offset_; + uint32_t data_end_offset_; static const size_t kNumInternalBytes = 8; static const uint32_t kSubIndexMask = 0x80000000; @@ -125,7 +130,7 @@ private: // any query to the table. // This query will populate the hash table hash_table_, the second // level of indexes sub_index_ and bloom filter filter_slice_ if enabled. - Status PopulateIndex(uint64_t file_size); + Status PopulateIndex(); // Check bloom filter to see whether it might contain this prefix bool MayHavePrefix(const Slice& target_prefix); diff --git a/table/table_properties.cc b/table/table_properties.cc index 2c9905884..47e7f8b33 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -105,4 +105,6 @@ const std::string TablePropertiesNames::kNumEntries = const std::string TablePropertiesNames::kFilterPolicy = "rocksdb.filter.policy"; +extern const std::string kPropertiesBlock = "rocksdb.properties"; + } // namespace rocksdb diff --git a/table/table_test.cc b/table/table_test.cc index 1f79fcdf9..7711ed8ad 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -15,17 +15,22 @@ #include "db/db_statistics.h" #include "db/memtable.h" #include "db/write_batch_internal.h" + #include "rocksdb/cache.h" #include "rocksdb/db.h" +#include "rocksdb/plain_table_factory.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/memtablerep.h" + +#include "table/meta_blocks.h" #include "table/block_based_table_builder.h" #include "table/block_based_table_factory.h" #include "table/block_based_table_reader.h" #include "table/block_builder.h" #include "table/block.h" #include "table/format.h" + #include "util/random.h" #include "util/testharness.h" #include "util/testutil.h" @@ -743,49 +748,6 @@ class Harness { Constructor* constructor_; }; -// Test the empty key -TEST(Harness, SimpleEmptyKey) { - std::vector args = GenerateArgList(); - for (unsigned int i = 0; i < args.size(); i++) { - Init(args[i]); - Random rnd(test::RandomSeed() + 1); - Add("", "v"); - Test(&rnd); - } -} - -TEST(Harness, SimpleSingle) { - std::vector args = GenerateArgList(); - for (unsigned int i = 0; i < args.size(); i++) { - Init(args[i]); - Random rnd(test::RandomSeed() + 2); - Add("abc", "v"); - Test(&rnd); - } -} - -TEST(Harness, SimpleMulti) { - std::vector args = GenerateArgList(); - for (unsigned int i = 0; i < args.size(); i++) { - Init(args[i]); - Random rnd(test::RandomSeed() + 3); - Add("abc", "v"); - Add("abcd", "v"); - Add("ac", "v2"); - Test(&rnd); - } -} - -TEST(Harness, SimpleSpecialKey) { - std::vector args = GenerateArgList(); - for (unsigned int i = 0; i < args.size(); i++) { - Init(args[i]); - Random rnd(test::RandomSeed() + 4); - Add("\xff\xff", "v3"); - Test(&rnd); - } -} - static bool Between(uint64_t val, uint64_t low, uint64_t high) { bool result = (val >= low) && (val <= high); if (!result) { @@ -801,7 +763,7 @@ class TableTest { }; // This test include all the basic checks except those for index size and block // size, which will be conducted in separated unit tests. -TEST(TableTest, BasicTableProperties) { +TEST(TableTest, BasicBlockedBasedTableProperties) { BlockBasedTableConstructor c(BytewiseComparator()); c.Add("a1", "val1"); @@ -845,6 +807,47 @@ TEST(TableTest, BasicTableProperties) { ); } +extern const uint64_t kPlainTableMagicNumber; +TEST(TableTest, BasicPlainTableProperties) { + PlainTableFactory factory(8, 8, 0); + StringSink sink; + std::unique_ptr builder(factory.GetTableBuilder( + Options(), + &sink, + kNoCompression + )); + + for (char c = 'a'; c <= 'z'; ++c) { + std::string key(16, c); + std::string value(28, c + 42); + builder->Add(key, value); + } + ASSERT_OK(builder->Finish()); + + StringSource source(sink.contents(), 72242); + + TableProperties props; + auto s = ReadTableProperties( + &source, + sink.contents().size(), + kPlainTableMagicNumber, + Env::Default(), + nullptr, + &props + ); + ASSERT_OK(s); + + ASSERT_EQ(0ul, props.index_size); + ASSERT_EQ(0ul, props.filter_size); + ASSERT_EQ(16ul * 26, props.raw_key_size); + ASSERT_EQ(28ul * 26, props.raw_value_size); + ASSERT_EQ(26ul, props.num_entries); + ASSERT_EQ(1ul, props.num_data_blocks); + + // User collected keys + // internal keys +} + TEST(TableTest, FilterPolicyNameProperties) { BlockBasedTableConstructor c(BytewiseComparator()); c.Add("a1", "val1"); @@ -1292,6 +1295,48 @@ TEST(MemTableTest, Simple) { delete memtable->Unref(); } +// Test the empty key +TEST(Harness, SimpleEmptyKey) { + std::vector args = GenerateArgList(); + for (unsigned int i = 0; i < args.size(); i++) { + Init(args[i]); + Random rnd(test::RandomSeed() + 1); + Add("", "v"); + Test(&rnd); + } +} + +TEST(Harness, SimpleSingle) { + std::vector args = GenerateArgList(); + for (unsigned int i = 0; i < args.size(); i++) { + Init(args[i]); + Random rnd(test::RandomSeed() + 2); + Add("abc", "v"); + Test(&rnd); + } +} + +TEST(Harness, SimpleMulti) { + std::vector args = GenerateArgList(); + for (unsigned int i = 0; i < args.size(); i++) { + Init(args[i]); + Random rnd(test::RandomSeed() + 3); + Add("abc", "v"); + Add("abcd", "v"); + Add("ac", "v2"); + Test(&rnd); + } +} + +TEST(Harness, SimpleSpecialKey) { + std::vector args = GenerateArgList(); + for (unsigned int i = 0; i < args.size(); i++) { + Init(args[i]); + Random rnd(test::RandomSeed() + 4); + Add("\xff\xff", "v3"); + Test(&rnd); + } +} } // namespace rocksdb