From 161ab42a8a6031b4ec8dc3ee1e0eefa731305c21 Mon Sep 17 00:00:00 2001 From: kailiu Date: Fri, 7 Feb 2014 19:26:49 -0800 Subject: [PATCH] Make table properties shareable Summary: We are going to expose properties of all tables to end users through "some" db interface. However, current design doesn't naturally fit for this need, which is because: 1. If a table presents in table cache, we cannot simply return the reference to its table properties, because the table may be destroy after compaction (and we don't want to hold the ref of the version). 2. Copy table properties is OK, but it's slow. Thus in this diff, I change the table reader's interface to return a shared pointer (for const table properties), instead a const refernce. Test Plan: `make check` passed Reviewers: haobo, sdong, dhruba Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D15999 --- db/simple_table_db_test.cc | 7 ++-- db/table_properties_collector_test.cc | 10 +++--- table/block_based_table_reader.cc | 10 ++++-- table/block_based_table_reader.h | 2 +- table/format.cc | 4 ++- table/meta_blocks.cc | 50 ++++++++++++++------------- table/meta_blocks.h | 25 +++++++------- table/plain_table_reader.cc | 17 +++++---- table/plain_table_reader.h | 8 +++-- table/table_reader.h | 3 +- table/table_test.cc | 24 ++++++------- tools/sst_dump.cc | 26 ++++++++------ 12 files changed, 102 insertions(+), 84 deletions(-) diff --git a/db/simple_table_db_test.cc b/db/simple_table_db_test.cc index 3d1420c0c..a67114663 100644 --- a/db/simple_table_db_test.cc +++ b/db/simple_table_db_test.cc @@ -96,7 +96,7 @@ public: void SetupForCompaction() override; - TableProperties& GetTableProperties() override; + std::shared_ptr GetTableProperties() const override; ~SimpleTableReader(); @@ -172,7 +172,7 @@ struct SimpleTableReader::Rep { unique_ptr file; uint64_t index_start_offset; int num_entries; - TableProperties table_properties; + std::shared_ptr table_properties; const static int user_key_size = 16; const static int offset_length = 8; @@ -215,7 +215,8 @@ Status SimpleTableReader::Open(const Options& options, void SimpleTableReader::SetupForCompaction() { } -TableProperties& SimpleTableReader::GetTableProperties() { +std::shared_ptr SimpleTableReader::GetTableProperties() + const { return rep_->table_properties; } diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index 961a7302b..28e0a75de 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -157,7 +157,7 @@ void TestCustomizedTablePropertiesCollector( // -- Step 2: Read properties FakeRandomeAccessFile readable(writable->contents()); - TableProperties props; + TableProperties* props; Status s = ReadTableProperties( &readable, writable->contents().size(), @@ -166,9 +166,10 @@ void TestCustomizedTablePropertiesCollector( nullptr, &props ); + std::unique_ptr props_guard(props); ASSERT_OK(s); - auto user_collected = props.user_collected_properties; + auto user_collected = props->user_collected_properties; ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest")); @@ -256,7 +257,7 @@ void TestInternalKeyPropertiesCollector( ASSERT_OK(builder->Finish()); FakeRandomeAccessFile readable(writable->contents()); - TableProperties props; + TableProperties* props; Status s = ReadTableProperties( &readable, writable->contents().size(), @@ -267,7 +268,8 @@ void TestInternalKeyPropertiesCollector( ); ASSERT_OK(s); - auto user_collected = props.user_collected_properties; + std::unique_ptr props_guard(props); + auto user_collected = props->user_collected_properties; uint64_t deleted = GetDeletedKeys(user_collected); ASSERT_EQ(4u, deleted); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index f4dd5b2ec..88ec65557 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -62,7 +62,7 @@ struct BlockBasedTable::Rep { unique_ptr index_block; unique_ptr filter; - TableProperties table_properties; + std::shared_ptr table_properties; }; BlockBasedTable::~BlockBasedTable() { @@ -255,9 +255,10 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, meta_iter->Seek(kPropertiesBlock); if (meta_iter->Valid() && meta_iter->key() == kPropertiesBlock) { s = meta_iter->status(); + TableProperties* table_properties = nullptr; if (s.ok()) { s = ReadProperties(meta_iter->value(), rep->file.get(), rep->options.env, - rep->options.info_log.get(), &rep->table_properties); + rep->options.info_log.get(), &table_properties); } if (!s.ok()) { @@ -265,6 +266,8 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, "[Warning] Encountered error while reading data from properties " "block " + s.ToString(); Log(rep->options.info_log, "%s", err_msg.c_str()); + } else { + rep->table_properties.reset(table_properties); } } @@ -339,7 +342,8 @@ void BlockBasedTable::SetupForCompaction() { compaction_optimized_ = true; } -const TableProperties& BlockBasedTable::GetTableProperties() { +std::shared_ptr BlockBasedTable::GetTableProperties() + const { return rep_->table_properties; } diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 58e5b0716..c711e7036 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -86,7 +86,7 @@ class BlockBasedTable : public TableReader { // posix_fadvise void SetupForCompaction() override; - const TableProperties& GetTableProperties() override; + std::shared_ptr GetTableProperties() const override; ~BlockBasedTable(); diff --git a/table/format.cc b/table/format.cc index 561d1689a..3d48b421d 100644 --- a/table/format.cc +++ b/table/format.cc @@ -10,6 +10,7 @@ #include "table/format.h" #include +#include #include "port/port.h" #include "rocksdb/env.h" @@ -64,7 +65,8 @@ Status Footer::DecodeFrom(Slice* input) { if (magic != table_magic_number()) { char buffer[80]; snprintf(buffer, sizeof(buffer) - 1, - "not an sstable (bad magic number --- %lx)", magic); + "not an sstable (bad magic number --- %#" PRIx64 ")", + magic); return Status::InvalidArgument(buffer); } } else { diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index fac84a01c..fa84b5a38 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -133,12 +133,9 @@ bool NotifyCollectTableCollectorsOnFinish( return all_succeeded; } -Status ReadProperties( - const Slice& handle_value, - RandomAccessFile* file, - Env* env, - Logger* logger, - TableProperties* table_properties) { +Status ReadProperties(const Slice& handle_value, RandomAccessFile* file, + Env* env, Logger* logger, + TableProperties** table_properties) { assert(table_properties); Slice v = handle_value; @@ -161,18 +158,22 @@ Status ReadProperties( std::unique_ptr iter( properties_block.NewIterator(BytewiseComparator())); + auto new_table_properties = new TableProperties(); // All pre-defined properties of type uint64_t std::unordered_map predefined_uint64_properties = { - {TablePropertiesNames::kDataSize, &table_properties->data_size}, - {TablePropertiesNames::kIndexSize, &table_properties->index_size}, - {TablePropertiesNames::kFilterSize, &table_properties->filter_size}, - {TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size}, - {TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size}, + {TablePropertiesNames::kDataSize, &new_table_properties->data_size}, + {TablePropertiesNames::kIndexSize, &new_table_properties->index_size}, + {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size}, + {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size}, + {TablePropertiesNames::kRawValueSize, + &new_table_properties->raw_value_size}, {TablePropertiesNames::kNumDataBlocks, - &table_properties->num_data_blocks}, - {TablePropertiesNames::kNumEntries, &table_properties->num_entries}, - {TablePropertiesNames::kFormatVersion, &table_properties->format_version}, - {TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len}}; + &new_table_properties->num_data_blocks}, + {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries}, + {TablePropertiesNames::kFormatVersion, + &new_table_properties->format_version}, + {TablePropertiesNames::kFixedKeyLen, + &new_table_properties->fixed_key_len}, }; std::string last_key; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { @@ -203,24 +204,25 @@ Status ReadProperties( } *(pos->second) = val; } else if (key == TablePropertiesNames::kFilterPolicy) { - table_properties->filter_policy_name = raw_val.ToString(); + new_table_properties->filter_policy_name = raw_val.ToString(); } else { // handle user-collected properties - table_properties->user_collected_properties.insert( + new_table_properties->user_collected_properties.insert( {key, raw_val.ToString()}); } } + if (s.ok()) { + *table_properties = new_table_properties; + } else { + delete new_table_properties; + } return s; } -Status ReadTableProperties( - RandomAccessFile* file, - uint64_t file_size, - uint64_t table_magic_number, - Env* env, - Logger* info_log, - TableProperties* properties) { +Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size, + uint64_t table_magic_number, Env* env, + Logger* info_log, TableProperties** properties) { // -- Read metaindex block Footer footer(table_magic_number); auto s = ReadFooterFromFile(file, file_size, &footer); diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 8994b01f3..f74e66592 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -103,21 +103,20 @@ bool NotifyCollectTableCollectorsOnFinish( PropertyBlockBuilder* builder); // Read the properties from the table. -Status ReadProperties( - const Slice& handle_value, - RandomAccessFile* file, - Env* env, - Logger* logger, - TableProperties* table_properties); +// @returns a status to indicate if the operation succeeded. On success, +// *table_properties will point to a heap-allocated TableProperties +// object, otherwise value of `table_properties` will not be modified. +Status ReadProperties(const Slice& handle_value, RandomAccessFile* file, + Env* env, Logger* logger, + TableProperties** table_properties); // Directly read the properties from the properties block of a plain table. -Status ReadTableProperties( - RandomAccessFile* file, - uint64_t file_size, - uint64_t table_magic_number, - Env* env, - Logger* info_log, - TableProperties* properties); +// @returns a status to indicate if the operation succeeded. On success, +// *table_properties will point to a heap-allocated TableProperties +// object, otherwise value of `table_properties` will not be modified. +Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size, + uint64_t table_magic_number, Env* env, + Logger* info_log, TableProperties** properties); // Read the magic number of the specified file directly. The magic number // of a valid sst table the last 8-byte of the file. diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index b07862bad..cf1025097 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -87,15 +87,15 @@ PlainTableReader::PlainTableReader(const EnvOptions& storage_options, const InternalKeyComparator& icomparator, uint64_t file_size, int bloom_bits_per_key, double hash_table_ratio, - const TableProperties& table_properties) + const TableProperties* table_properties) : soptions_(storage_options), internal_comparator_(icomparator), file_size_(file_size), kHashTableRatio(hash_table_ratio), kBloomBitsPerKey(bloom_bits_per_key), table_properties_(table_properties), - data_end_offset_(table_properties_.data_size), - user_key_len_(table_properties.fixed_key_len) {} + data_end_offset_(table_properties_->data_size), + user_key_len_(table_properties->fixed_key_len) {} PlainTableReader::~PlainTableReader() { delete[] hash_table_; @@ -117,17 +117,16 @@ Status PlainTableReader::Open(const Options& options, return Status::NotSupported("File is too large for PlainTableReader!"); } - TableProperties table_properties; + TableProperties* props = nullptr; auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, - options.env, options.info_log.get(), - &table_properties); + options.env, options.info_log.get(), &props); if (!s.ok()) { return s; } - std::unique_ptr new_reader(new PlainTableReader( - soptions, internal_comparator, file_size, bloom_bits_per_key, - hash_table_ratio, table_properties)); + std::unique_ptr new_reader( + new PlainTableReader(soptions, internal_comparator, file_size, + bloom_bits_per_key, hash_table_ratio, props)); new_reader->file_ = std::move(file); new_reader->options_ = options; diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h index 1abe4e35c..dd7b1e50f 100644 --- a/table/plain_table_reader.h +++ b/table/plain_table_reader.h @@ -64,13 +64,15 @@ class PlainTableReader: public TableReader { void SetupForCompaction(); - const TableProperties& GetTableProperties() { return table_properties_; } + std::shared_ptr GetTableProperties() const { + return table_properties_; + } PlainTableReader(const EnvOptions& storage_options, const InternalKeyComparator& internal_comparator, uint64_t file_size, int bloom_num_bits, double hash_table_ratio, - const TableProperties& table_properties); + const TableProperties* table_properties); ~PlainTableReader(); private: @@ -95,7 +97,7 @@ class PlainTableReader: public TableReader { const int kBloomBitsPerKey; DynamicBloom* bloom_ = nullptr; - TableProperties table_properties_; + std::shared_ptr table_properties_; const uint32_t data_start_offset_ = 0; const uint32_t data_end_offset_; const size_t user_key_len_; diff --git a/table/table_reader.h b/table/table_reader.h index 9acbb33d0..3d2738c9c 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -8,6 +8,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once +#include namespace rocksdb { @@ -47,7 +48,7 @@ class TableReader { // posix_fadvise virtual void SetupForCompaction() = 0; - virtual const TableProperties& GetTableProperties() = 0; + virtual std::shared_ptr GetTableProperties() const = 0; // Calls (*result_handler)(handle_context, ...) repeatedly, starting with // the entry found after a call to Seek(key), until result_handler returns diff --git a/table/table_test.cc b/table/table_test.cc index 34a1932a8..965ecaa8d 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -932,7 +932,7 @@ TEST(BlockBasedTableTest, BasicBlockBasedTableProperties) { c.Finish(options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); - auto& props = c.table_reader()->GetTableProperties(); + auto& props = *c.table_reader()->GetTableProperties(); ASSERT_EQ(kvmap.size(), props.num_entries); auto raw_key_size = kvmap.size() * 2ul; @@ -963,7 +963,7 @@ TEST(BlockBasedTableTest, FilterPolicyNameProperties) { c.Finish(options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); - auto& props = c.table_reader()->GetTableProperties(); + auto& props = *c.table_reader()->GetTableProperties(); ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name); } @@ -1005,8 +1005,7 @@ TEST(BlockBasedTableTest, IndexSizeStat) { c.Finish(options, GetPlainInternalComparator(options.comparator), &ks, &kvmap); - auto index_size = - c.table_reader()->GetTableProperties().index_size; + auto index_size = c.table_reader()->GetTableProperties()->index_size; ASSERT_GT(index_size, last_index_size); last_index_size = index_size; } @@ -1031,7 +1030,7 @@ TEST(BlockBasedTableTest, NumBlockStat) { c.Finish(options, GetPlainInternalComparator(options.comparator), &ks, &kvmap); ASSERT_EQ(kvmap.size(), - c.table_reader()->GetTableProperties().num_data_blocks); + c.table_reader()->GetTableProperties()->num_data_blocks); } class BlockCacheProperties { @@ -1237,18 +1236,19 @@ TEST(PlainTableTest, BasicPlainTableProperties) { StringSource source(sink.contents(), 72242, true); - TableProperties props; + TableProperties* props = nullptr; + std::unique_ptr props_guard; auto s = ReadTableProperties(&source, sink.contents().size(), kPlainTableMagicNumber, Env::Default(), nullptr, &props); ASSERT_OK(s); - ASSERT_EQ(0ul, props.index_size); - ASSERT_EQ(0ul, props.filter_size); - ASSERT_EQ(16ul * 26, props.raw_key_size); - ASSERT_EQ(28ul * 26, props.raw_value_size); - ASSERT_EQ(26ul, props.num_entries); - ASSERT_EQ(1ul, props.num_data_blocks); + ASSERT_EQ(0ul, props->index_size); + ASSERT_EQ(0ul, props->filter_size); + ASSERT_EQ(16ul * 26, props->raw_key_size); + ASSERT_EQ(28ul * 26, props->raw_value_size); + ASSERT_EQ(26ul, props->num_entries); + ASSERT_EQ(1ul, props->num_data_blocks); } TEST(GeneralTableTest, ApproximateOffsetOfPlain) { diff --git a/tools/sst_dump.cc b/tools/sst_dump.cc index 3b82571bf..6ad08c8a8 100644 --- a/tools/sst_dump.cc +++ b/tools/sst_dump.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include "db/dbformat.h" #include "db/memtable.h" @@ -43,7 +44,8 @@ class SstFileReader { bool has_to, const std::string& to_key); - Status ReadTableProperties(TableProperties* table_properties); + Status ReadTableProperties( + std::shared_ptr* table_properties); uint64_t GetReadNumber() { return read_num_; } private: @@ -112,10 +114,11 @@ Status SstFileReader::NewTableReader(const std::string& file_path) { Status SstFileReader::SetTableOptionsByMagicNumber(uint64_t table_magic_number, RandomAccessFile* file, uint64_t file_size) { - TableProperties table_properties; + TableProperties* table_properties; Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number, options_.env, options_.info_log.get(), &table_properties); + std::unique_ptr props_guard(table_properties); if (!s.ok()) { return s; } @@ -126,13 +129,14 @@ Status SstFileReader::SetTableOptionsByMagicNumber(uint64_t table_magic_number, } else if (table_magic_number == kPlainTableMagicNumber) { options_.allow_mmap_reads = true; options_.table_factory = std::make_shared( - table_properties.fixed_key_len, 2, 0.8); + table_properties->fixed_key_len, 2, 0.8); options_.prefix_extractor = NewNoopTransform(); fprintf(stdout, "Sst file format: plain table\n"); } else { char error_msg_buffer[80]; snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1, - "Unsupported table magic number --- %lx)", table_magic_number); + "Unsupported table magic number --- %#" PRIx64, + table_magic_number); return Status::InvalidArgument(error_msg_buffer); } @@ -192,7 +196,8 @@ Status SstFileReader::ReadSequential(bool print_kv, return ret; } -Status SstFileReader::ReadTableProperties(TableProperties* table_properties) { +Status SstFileReader::ReadTableProperties( + std::shared_ptr* table_properties) { if (!table_reader_) { return init_result_; } @@ -335,18 +340,19 @@ int main(int argc, char** argv) { } } if (show_properties) { - rocksdb::TableProperties table_properties; + std::shared_ptr table_properties; st = reader.ReadTableProperties(&table_properties); if (!st.ok()) { fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str()); } else { fprintf(stdout, - "Table Properties:\n" - "------------------------------\n" - " %s", table_properties.ToString("\n ", ": ").c_str()); + "Table Properties:\n" + "------------------------------\n" + " %s", + table_properties->ToString("\n ", ": ").c_str()); fprintf(stdout, "# deleted keys: %zd\n", rocksdb::GetDeletedKeys( - table_properties.user_collected_properties)); + table_properties->user_collected_properties)); } } }