Add the property block for the plain table
Summary: This is the last diff that adds the property block to plain table. The format resembles that of the block-based table: https://github.com/facebook/rocksdb/wiki/Rocksdb-table-format [data block] [meta block 1: stats block] [meta block 2: future extended block] ... [meta block K: future extended block] (we may add more meta blocks in the future) [metaindex block] [index block: we only have the placeholder here, we can add persistent index block in the future] [Footer: contains magic number, handle to metaindex block and index block] <end_of_file> Test Plan: extended existing property block test. Reviewers: haobo, sdong, dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D14523
This commit is contained in:
parent
5f5e5fc2e9
commit
2e9efcd6d8
@ -12,7 +12,9 @@
|
||||
#include "db/table_properties_collector.h"
|
||||
#include "rocksdb/table_properties.h"
|
||||
#include "rocksdb/table.h"
|
||||
#include "rocksdb/plain_table_factory.h"
|
||||
#include "table/block_based_table_factory.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
@ -20,8 +22,6 @@
|
||||
namespace rocksdb {
|
||||
|
||||
class TablePropertiesTest {
|
||||
private:
|
||||
unique_ptr<TableReader> table_reader_;
|
||||
};
|
||||
|
||||
// TODO(kailiu) the following classes should be moved to some more general
|
||||
@ -93,22 +93,6 @@ void MakeBuilder(
|
||||
options.compression));
|
||||
}
|
||||
|
||||
void OpenTable(
|
||||
const Options& options,
|
||||
const std::string& contents,
|
||||
std::unique_ptr<TableReader>* table_reader) {
|
||||
|
||||
std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents));
|
||||
auto s = options.table_factory->GetTableReader(
|
||||
options,
|
||||
EnvOptions(),
|
||||
std::move(file),
|
||||
contents.size(),
|
||||
table_reader
|
||||
);
|
||||
ASSERT_OK(s);
|
||||
}
|
||||
|
||||
// Collects keys that starts with "A" in a table.
|
||||
class RegularKeysStartWithA: public TablePropertiesCollector {
|
||||
public:
|
||||
@ -141,9 +125,12 @@ class RegularKeysStartWithA: public TablePropertiesCollector {
|
||||
uint32_t count_ = 0;
|
||||
};
|
||||
|
||||
TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
|
||||
Options options;
|
||||
|
||||
extern uint64_t kBlockBasedTableMagicNumber;
|
||||
extern uint64_t kPlainTableMagicNumber;
|
||||
void TestCustomizedTablePropertiesCollector(
|
||||
uint64_t magic_number,
|
||||
bool encode_as_internal,
|
||||
const Options& options) {
|
||||
// make sure the entries will be inserted with order.
|
||||
std::map<std::string, std::string> kvs = {
|
||||
{"About ", "val5"}, // starts with 'A'
|
||||
@ -155,18 +142,7 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
|
||||
{"Find ", "val6"},
|
||||
};
|
||||
|
||||
// Test properties collectors with internal keys or regular keys
|
||||
for (bool encode_as_internal : { true, false }) {
|
||||
// -- Step 1: build table
|
||||
auto collector = new RegularKeysStartWithA();
|
||||
if (encode_as_internal) {
|
||||
options.table_properties_collectors = {
|
||||
std::make_shared<UserKeyTablePropertiesCollector>(collector)
|
||||
};
|
||||
} else {
|
||||
options.table_properties_collectors.resize(1);
|
||||
options.table_properties_collectors[0].reset(collector);
|
||||
}
|
||||
std::unique_ptr<TableBuilder> builder;
|
||||
std::unique_ptr<FakeWritableFile> writable;
|
||||
MakeBuilder(options, &writable, &builder);
|
||||
@ -181,22 +157,65 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
|
||||
}
|
||||
ASSERT_OK(builder->Finish());
|
||||
|
||||
// -- Step 2: Open table
|
||||
std::unique_ptr<TableReader> table_reader;
|
||||
OpenTable(options, writable->contents(), &table_reader);
|
||||
const auto& properties =
|
||||
table_reader->GetTableProperties().user_collected_properties;
|
||||
// -- Step 2: Read properties
|
||||
FakeRandomeAccessFile readable(writable->contents());
|
||||
TableProperties props;
|
||||
Status s = ReadTableProperties(
|
||||
&readable,
|
||||
writable->contents().size(),
|
||||
magic_number,
|
||||
Env::Default(),
|
||||
nullptr,
|
||||
&props
|
||||
);
|
||||
ASSERT_OK(s);
|
||||
|
||||
ASSERT_EQ("Rocksdb", properties.at("TablePropertiesTest"));
|
||||
auto user_collected = props.user_collected_properties;
|
||||
|
||||
ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest"));
|
||||
|
||||
uint32_t starts_with_A = 0;
|
||||
Slice key(properties.at("Count"));
|
||||
Slice key(user_collected.at("Count"));
|
||||
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
|
||||
ASSERT_EQ(3u, starts_with_A);
|
||||
}
|
||||
|
||||
TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
|
||||
// Test properties collectors with internal keys or regular keys
|
||||
// for block based table
|
||||
for (bool encode_as_internal : { true, false }) {
|
||||
Options options;
|
||||
auto collector = new RegularKeysStartWithA();
|
||||
if (encode_as_internal) {
|
||||
options.table_properties_collectors = {
|
||||
std::make_shared<UserKeyTablePropertiesCollector>(collector)
|
||||
};
|
||||
} else {
|
||||
options.table_properties_collectors.resize(1);
|
||||
options.table_properties_collectors[0].reset(collector);
|
||||
}
|
||||
TestCustomizedTablePropertiesCollector(
|
||||
kBlockBasedTableMagicNumber,
|
||||
encode_as_internal,
|
||||
options
|
||||
);
|
||||
}
|
||||
|
||||
TEST(TablePropertiesTest, InternalKeyPropertiesCollector) {
|
||||
// test plain table
|
||||
Options options;
|
||||
options.table_properties_collectors.push_back(
|
||||
std::make_shared<RegularKeysStartWithA>()
|
||||
);
|
||||
options.table_factory = std::make_shared<PlainTableFactory>(8, 8, 0);
|
||||
TestCustomizedTablePropertiesCollector(
|
||||
kPlainTableMagicNumber, true, options
|
||||
);
|
||||
}
|
||||
|
||||
void TestInternalKeyPropertiesCollector(
|
||||
uint64_t magic_number,
|
||||
bool sanitized,
|
||||
std::shared_ptr<TableFactory> table_factory) {
|
||||
InternalKey keys[] = {
|
||||
InternalKey("A ", 0, ValueType::kTypeValue),
|
||||
InternalKey("B ", 0, ValueType::kTypeValue),
|
||||
@ -207,10 +226,10 @@ TEST(TablePropertiesTest, InternalKeyPropertiesCollector) {
|
||||
InternalKey("Z ", 0, ValueType::kTypeDeletion),
|
||||
};
|
||||
|
||||
for (bool sanitized : { false, true }) {
|
||||
std::unique_ptr<TableBuilder> builder;
|
||||
std::unique_ptr<FakeWritableFile> writable;
|
||||
Options options;
|
||||
options.table_factory = table_factory;
|
||||
if (sanitized) {
|
||||
options.table_properties_collectors = {
|
||||
std::make_shared<RegularKeysStartWithA>()
|
||||
@ -241,21 +260,46 @@ TEST(TablePropertiesTest, InternalKeyPropertiesCollector) {
|
||||
|
||||
ASSERT_OK(builder->Finish());
|
||||
|
||||
std::unique_ptr<TableReader> table_reader;
|
||||
OpenTable(options, writable->contents(), &table_reader);
|
||||
const auto& properties =
|
||||
table_reader->GetTableProperties().user_collected_properties;
|
||||
FakeRandomeAccessFile readable(writable->contents());
|
||||
TableProperties props;
|
||||
Status s = ReadTableProperties(
|
||||
&readable,
|
||||
writable->contents().size(),
|
||||
magic_number,
|
||||
Env::Default(),
|
||||
nullptr,
|
||||
&props
|
||||
);
|
||||
ASSERT_OK(s);
|
||||
|
||||
uint64_t deleted = GetDeletedKeys(properties);
|
||||
auto user_collected = props.user_collected_properties;
|
||||
uint64_t deleted = GetDeletedKeys(user_collected);
|
||||
ASSERT_EQ(4u, deleted);
|
||||
|
||||
if (sanitized) {
|
||||
uint32_t starts_with_A = 0;
|
||||
Slice key(properties.at("Count"));
|
||||
Slice key(user_collected.at("Count"));
|
||||
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
|
||||
ASSERT_EQ(1u, starts_with_A);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TablePropertiesTest, InternalKeyPropertiesCollector) {
|
||||
TestInternalKeyPropertiesCollector(
|
||||
kBlockBasedTableMagicNumber,
|
||||
true /* sanitize */,
|
||||
std::make_shared<BlockBasedTableFactory>()
|
||||
);
|
||||
TestInternalKeyPropertiesCollector(
|
||||
kBlockBasedTableMagicNumber,
|
||||
true /* not sanitize */,
|
||||
std::make_shared<BlockBasedTableFactory>()
|
||||
);
|
||||
TestInternalKeyPropertiesCollector(
|
||||
kPlainTableMagicNumber,
|
||||
false /* not sanitize */,
|
||||
std::make_shared<PlainTableFactory>(8, 8, 0)
|
||||
);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -64,6 +64,7 @@ struct TablePropertiesNames {
|
||||
static const std::string kFilterPolicy;
|
||||
};
|
||||
|
||||
extern const std::string kPropertiesBlock;
|
||||
|
||||
// `TablePropertiesCollector` provides the mechanism for users to collect
|
||||
// their own interested properties. This class is essentially a collection
|
||||
|
@ -387,7 +387,7 @@ Status BlockBasedTableBuilder::Finish() {
|
||||
&properties_block_handle
|
||||
);
|
||||
|
||||
meta_index_builer.Add(BlockBasedTable::kPropertiesBlock,
|
||||
meta_index_builer.Add(kPropertiesBlock,
|
||||
properties_block_handle);
|
||||
} // end of properties block writing
|
||||
|
||||
@ -459,7 +459,5 @@ uint64_t BlockBasedTableBuilder::FileSize() const {
|
||||
|
||||
const std::string BlockBasedTable::kFilterBlockPrefix =
|
||||
"filter.";
|
||||
const std::string BlockBasedTable::kPropertiesBlock =
|
||||
"rocksdb.properties";
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "table/block.h"
|
||||
#include "table/filter_block.h"
|
||||
#include "table/format.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/two_level_iterator.h"
|
||||
|
||||
#include "util/coding.h"
|
||||
@ -250,10 +251,16 @@ Status BlockBasedTable::Open(const Options& options,
|
||||
|
||||
// Read the properties
|
||||
meta_iter->Seek(kPropertiesBlock);
|
||||
if (meta_iter->Valid() && meta_iter->key() == Slice(kPropertiesBlock)) {
|
||||
if (meta_iter->Valid() && meta_iter->key() == kPropertiesBlock) {
|
||||
s = meta_iter->status();
|
||||
if (s.ok()) {
|
||||
s = ReadProperties(meta_iter->value(), rep, &rep->table_properties);
|
||||
s = ReadProperties(
|
||||
meta_iter->value(),
|
||||
rep->file.get(),
|
||||
rep->options.env,
|
||||
rep->options.info_log.get(),
|
||||
&rep->table_properties
|
||||
);
|
||||
}
|
||||
|
||||
if (!s.ok()) {
|
||||
@ -401,96 +408,6 @@ FilterBlockReader* BlockBasedTable::ReadFilter (
|
||||
rep->options, block.data, block.heap_allocated);
|
||||
}
|
||||
|
||||
Status BlockBasedTable::ReadProperties(
|
||||
const Slice& handle_value, Rep* rep, TableProperties* table_properties) {
|
||||
assert(table_properties);
|
||||
|
||||
Slice v = handle_value;
|
||||
BlockHandle handle;
|
||||
if (!handle.DecodeFrom(&v).ok()) {
|
||||
return Status::InvalidArgument("Failed to decode properties block handle");
|
||||
}
|
||||
|
||||
BlockContents block_contents;
|
||||
Status s = ReadBlockContents(
|
||||
rep->file.get(),
|
||||
ReadOptions(),
|
||||
handle,
|
||||
&block_contents,
|
||||
rep->options.env,
|
||||
false
|
||||
);
|
||||
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
Block properties_block(block_contents);
|
||||
std::unique_ptr<Iterator> iter(
|
||||
properties_block.NewIterator(BytewiseComparator())
|
||||
);
|
||||
|
||||
// All pre-defined properties of type uint64_t
|
||||
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
|
||||
{ TablePropertiesNames::kDataSize,
|
||||
&table_properties->data_size },
|
||||
{ TablePropertiesNames::kIndexSize,
|
||||
&table_properties->index_size },
|
||||
{ TablePropertiesNames::kFilterSize,
|
||||
&table_properties->filter_size },
|
||||
{ TablePropertiesNames::kRawKeySize,
|
||||
&table_properties->raw_key_size },
|
||||
{ TablePropertiesNames::kRawValueSize,
|
||||
&table_properties->raw_value_size },
|
||||
{ TablePropertiesNames::kNumDataBlocks,
|
||||
&table_properties->num_data_blocks },
|
||||
{ TablePropertiesNames::kNumEntries,
|
||||
&table_properties->num_entries },
|
||||
};
|
||||
|
||||
std::string last_key;
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
s = iter->status();
|
||||
if (!s.ok()) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto key = iter->key().ToString();
|
||||
// properties block is strictly sorted with no duplicate key.
|
||||
assert(
|
||||
last_key.empty() ||
|
||||
BytewiseComparator()->Compare(key, last_key) > 0
|
||||
);
|
||||
last_key = key;
|
||||
|
||||
auto raw_val = iter->value();
|
||||
auto pos = predefined_uint64_properties.find(key);
|
||||
|
||||
if (pos != predefined_uint64_properties.end()) {
|
||||
// handle predefined rocksdb properties
|
||||
uint64_t val;
|
||||
if (!GetVarint64(&raw_val, &val)) {
|
||||
// skip malformed value
|
||||
auto error_msg =
|
||||
"[Warning] detect malformed value in properties meta-block:"
|
||||
"\tkey: " + key + "\tval: " + raw_val.ToString();
|
||||
Log(rep->options.info_log, "%s", error_msg.c_str());
|
||||
continue;
|
||||
}
|
||||
*(pos->second) = val;
|
||||
} else if (key == TablePropertiesNames::kFilterPolicy) {
|
||||
table_properties->filter_policy_name = raw_val.ToString();
|
||||
} else {
|
||||
// handle user-collected
|
||||
table_properties->user_collected_properties.insert(
|
||||
std::make_pair(key, raw_val.ToString())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Status BlockBasedTable::GetBlock(
|
||||
const BlockBasedTable* table,
|
||||
const BlockHandle& handle,
|
||||
|
@ -38,7 +38,6 @@ using std::unique_ptr;
|
||||
class BlockBasedTable : public TableReader {
|
||||
public:
|
||||
static const std::string kFilterBlockPrefix;
|
||||
static const std::string kPropertiesBlock;
|
||||
|
||||
// Attempt to open the table that is stored in bytes [0..file_size)
|
||||
// of "file", and read the metadata entries necessary to allow
|
||||
@ -142,7 +141,6 @@ class BlockBasedTable : public TableReader {
|
||||
|
||||
void ReadMeta(const Footer& footer);
|
||||
void ReadFilter(const Slice& filter_handle_value);
|
||||
static Status ReadProperties(const Slice& handle_value, Rep* rep);
|
||||
|
||||
// Read the meta block from sst.
|
||||
static Status ReadMetaBlock(
|
||||
@ -156,10 +154,6 @@ class BlockBasedTable : public TableReader {
|
||||
Rep* rep,
|
||||
size_t* filter_size = nullptr);
|
||||
|
||||
// Read the table properties from properties block.
|
||||
static Status ReadProperties(
|
||||
const Slice& handle_value, Rep* rep, TableProperties* properties);
|
||||
|
||||
static void SetupCacheKeyPrefix(Rep* rep);
|
||||
|
||||
explicit BlockBasedTable(Rep* rep) :
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <map>
|
||||
|
||||
#include "rocksdb/table_properties.h"
|
||||
#include "table/block.h"
|
||||
#include "table/format.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
@ -131,4 +132,147 @@ bool NotifyCollectTableCollectorsOnFinish(
|
||||
return all_succeeded;
|
||||
}
|
||||
|
||||
Status ReadProperties(
|
||||
const Slice& handle_value,
|
||||
RandomAccessFile* file,
|
||||
Env* env,
|
||||
Logger* logger,
|
||||
TableProperties* table_properties) {
|
||||
assert(table_properties);
|
||||
|
||||
Slice v = handle_value;
|
||||
BlockHandle handle;
|
||||
if (!handle.DecodeFrom(&v).ok()) {
|
||||
return Status::InvalidArgument("Failed to decode properties block handle");
|
||||
}
|
||||
|
||||
BlockContents block_contents;
|
||||
Status s = ReadBlockContents(
|
||||
file,
|
||||
ReadOptions(),
|
||||
handle,
|
||||
&block_contents,
|
||||
env,
|
||||
false
|
||||
);
|
||||
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
Block properties_block(block_contents);
|
||||
std::unique_ptr<Iterator> iter(
|
||||
properties_block.NewIterator(BytewiseComparator())
|
||||
);
|
||||
|
||||
// All pre-defined properties of type uint64_t
|
||||
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
|
||||
{ TablePropertiesNames::kDataSize, &table_properties->data_size },
|
||||
{ TablePropertiesNames::kIndexSize, &table_properties->index_size },
|
||||
{ TablePropertiesNames::kFilterSize, &table_properties->filter_size },
|
||||
{ TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size },
|
||||
{ TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size },
|
||||
{ TablePropertiesNames::kNumDataBlocks,
|
||||
&table_properties->num_data_blocks },
|
||||
{ TablePropertiesNames::kNumEntries, &table_properties->num_entries },
|
||||
};
|
||||
|
||||
std::string last_key;
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
s = iter->status();
|
||||
if (!s.ok()) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto key = iter->key().ToString();
|
||||
// properties block is strictly sorted with no duplicate key.
|
||||
assert(
|
||||
last_key.empty() ||
|
||||
BytewiseComparator()->Compare(key, last_key) > 0
|
||||
);
|
||||
last_key = key;
|
||||
|
||||
auto raw_val = iter->value();
|
||||
auto pos = predefined_uint64_properties.find(key);
|
||||
|
||||
if (pos != predefined_uint64_properties.end()) {
|
||||
// handle predefined rocksdb properties
|
||||
uint64_t val;
|
||||
if (!GetVarint64(&raw_val, &val)) {
|
||||
// skip malformed value
|
||||
auto error_msg =
|
||||
"[Warning] detect malformed value in properties meta-block:"
|
||||
"\tkey: " + key + "\tval: " + raw_val.ToString();
|
||||
Log(logger, "%s", error_msg.c_str());
|
||||
continue;
|
||||
}
|
||||
*(pos->second) = val;
|
||||
} else if (key == TablePropertiesNames::kFilterPolicy) {
|
||||
table_properties->filter_policy_name = raw_val.ToString();
|
||||
} else {
|
||||
// handle user-collected properties
|
||||
table_properties->user_collected_properties.insert(
|
||||
std::make_pair(key, raw_val.ToString())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Status ReadTableProperties(
|
||||
RandomAccessFile* file,
|
||||
uint64_t file_size,
|
||||
uint64_t table_magic_number,
|
||||
Env* env,
|
||||
Logger* info_log,
|
||||
TableProperties* properties) {
|
||||
// -- Read metaindex block
|
||||
Footer footer(table_magic_number);
|
||||
auto s = ReadFooterFromFile(file, file_size, &footer);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
auto metaindex_handle = footer.metaindex_handle();
|
||||
BlockContents metaindex_contents;
|
||||
s = ReadBlockContents(
|
||||
file,
|
||||
ReadOptions(),
|
||||
metaindex_handle,
|
||||
&metaindex_contents,
|
||||
env,
|
||||
false
|
||||
);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
Block metaindex_block(metaindex_contents);
|
||||
std::unique_ptr<Iterator> meta_iter(
|
||||
metaindex_block.NewIterator(BytewiseComparator())
|
||||
);
|
||||
|
||||
// -- Read property block
|
||||
meta_iter->Seek(kPropertiesBlock);
|
||||
TableProperties table_properties;
|
||||
if (meta_iter->Valid() &&
|
||||
meta_iter->key() == kPropertiesBlock &&
|
||||
meta_iter->status().ok()) {
|
||||
s = ReadProperties(
|
||||
meta_iter->value(),
|
||||
file,
|
||||
env,
|
||||
info_log,
|
||||
properties
|
||||
);
|
||||
} else {
|
||||
s = Status::Corruption(
|
||||
"Unable to read the property block from the plain table"
|
||||
);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -15,9 +15,11 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class BlockHandle;
|
||||
class BlockBuilder;
|
||||
class BlockHandle;
|
||||
class Env;
|
||||
class Logger;
|
||||
class RandomAccessFile;
|
||||
struct TableProperties;
|
||||
|
||||
// An STL style comparator that does the bytewise comparator comparasion
|
||||
@ -49,11 +51,6 @@ class MetaIndexBuilder {
|
||||
Slice Finish();
|
||||
|
||||
private:
|
||||
// * Key: meta block name
|
||||
// * Value: block handle to that meta block
|
||||
struct Rep;
|
||||
Rep* rep_;
|
||||
|
||||
// store the sorted key/handle of the metablocks.
|
||||
BytewiseSortedMap meta_block_handles_;
|
||||
std::unique_ptr<BlockBuilder> meta_index_block_;
|
||||
@ -103,4 +100,21 @@ bool NotifyCollectTableCollectorsOnFinish(
|
||||
Logger* info_log,
|
||||
PropertyBlockBuilder* builder);
|
||||
|
||||
// Read the properties from the table.
|
||||
Status ReadProperties(
|
||||
const Slice& handle_value,
|
||||
RandomAccessFile* file,
|
||||
Env* env,
|
||||
Logger* logger,
|
||||
TableProperties* table_properties);
|
||||
|
||||
// Directly read the properties from the properties block of a plain table.
|
||||
Status ReadTableProperties(
|
||||
RandomAccessFile* file,
|
||||
uint64_t file_size,
|
||||
uint64_t table_magic_number,
|
||||
Env* env,
|
||||
Logger* info_log,
|
||||
TableProperties* properties);
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -14,12 +14,40 @@
|
||||
#include "table/block_builder.h"
|
||||
#include "table/filter_block.h"
|
||||
#include "table/format.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/crc32c.h"
|
||||
#include "util/stop_watch.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
namespace {
|
||||
|
||||
// a utility that helps writing block content to the file
|
||||
// @offset will advance if @block_contents was successfully written.
|
||||
// @block_handle the block handle this particular block.
|
||||
Status WriteBlock(
|
||||
const Slice& block_contents,
|
||||
WritableFile* file,
|
||||
uint64_t* offset,
|
||||
BlockHandle* block_handle) {
|
||||
block_handle->set_offset(*offset);
|
||||
block_handle->set_size(block_contents.size());
|
||||
Status s = file->Append(block_contents);
|
||||
|
||||
if (s.ok()) {
|
||||
*offset += block_contents.size();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// kPlainTableMagicNumber was picked by running
|
||||
// echo rocksdb.plain.table | sha1sum
|
||||
// and taking the leading 64 bits.
|
||||
extern const uint64_t kPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
|
||||
|
||||
PlainTableBuilder::PlainTableBuilder(const Options& options,
|
||||
WritableFile* file,
|
||||
int user_key_size, int key_prefix_len) :
|
||||
@ -28,15 +56,18 @@ PlainTableBuilder::PlainTableBuilder(const Options& options,
|
||||
PutFixed32(&version, 1 | 0x80000000);
|
||||
file_->Append(Slice(version));
|
||||
offset_ = 4;
|
||||
|
||||
// for plain table, we put all the data in a big chuck.
|
||||
properties_.num_data_blocks = 1;
|
||||
// emphasize that currently plain table doesn't have persistent index or
|
||||
// filter block.
|
||||
properties_.index_size = 0;
|
||||
properties_.filter_size = 0;
|
||||
}
|
||||
|
||||
PlainTableBuilder::~PlainTableBuilder() {
|
||||
}
|
||||
|
||||
Status PlainTableBuilder::ChangeOptions(const Options& options) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
assert((int) key.size() == GetInternalKeyLength());
|
||||
|
||||
@ -52,7 +83,17 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
file_->Append(value);
|
||||
offset_ += value_size + size.length();
|
||||
|
||||
num_entries_++;
|
||||
properties_.num_entries++;
|
||||
properties_.raw_key_size += key.size();
|
||||
properties_.raw_value_size += value.size();
|
||||
|
||||
// notify property collectors
|
||||
NotifyCollectTableCollectorsOnAdd(
|
||||
key,
|
||||
value,
|
||||
options_.table_properties_collectors,
|
||||
options_.info_log.get()
|
||||
);
|
||||
}
|
||||
|
||||
Status PlainTableBuilder::status() const {
|
||||
@ -62,7 +103,63 @@ Status PlainTableBuilder::status() const {
|
||||
Status PlainTableBuilder::Finish() {
|
||||
assert(!closed_);
|
||||
closed_ = true;
|
||||
return Status::OK();
|
||||
|
||||
properties_.data_size = offset_;
|
||||
|
||||
// Write the following blocks
|
||||
// 1. [meta block: properties]
|
||||
// 2. [metaindex block]
|
||||
// 3. [footer]
|
||||
MetaIndexBuilder meta_index_builer;
|
||||
|
||||
PropertyBlockBuilder property_block_builder;
|
||||
// -- Add basic properties
|
||||
property_block_builder.AddTableProperty(properties_);
|
||||
|
||||
// -- Add user collected properties
|
||||
NotifyCollectTableCollectorsOnFinish(
|
||||
options_.table_properties_collectors,
|
||||
options_.info_log.get(),
|
||||
&property_block_builder
|
||||
);
|
||||
|
||||
// -- Write property block
|
||||
BlockHandle property_block_handle;
|
||||
auto s = WriteBlock(
|
||||
property_block_builder.Finish(),
|
||||
file_,
|
||||
&offset_,
|
||||
&property_block_handle
|
||||
);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
meta_index_builer.Add(kPropertiesBlock, property_block_handle);
|
||||
|
||||
// -- write metaindex block
|
||||
BlockHandle metaindex_block_handle;
|
||||
s = WriteBlock(
|
||||
meta_index_builer.Finish(),
|
||||
file_,
|
||||
&offset_,
|
||||
&metaindex_block_handle
|
||||
);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// Write Footer
|
||||
Footer footer(kPlainTableMagicNumber);
|
||||
footer.set_metaindex_handle(metaindex_block_handle);
|
||||
footer.set_index_handle(BlockHandle::NullBlockHandle());
|
||||
std::string footer_encoding;
|
||||
footer.EncodeTo(&footer_encoding);
|
||||
s = file_->Append(footer_encoding);
|
||||
if (s.ok()) {
|
||||
offset_ += footer_encoding.size();
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
void PlainTableBuilder::Abandon() {
|
||||
@ -70,7 +167,7 @@ void PlainTableBuilder::Abandon() {
|
||||
}
|
||||
|
||||
uint64_t PlainTableBuilder::NumEntries() const {
|
||||
return num_entries_;
|
||||
return properties_.num_entries;
|
||||
}
|
||||
|
||||
uint64_t PlainTableBuilder::FileSize() const {
|
||||
|
@ -32,14 +32,6 @@ public:
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
~PlainTableBuilder();
|
||||
|
||||
// Change the options used by this builder. Note: only some of the
|
||||
// option fields can be changed after construction. If a field is
|
||||
// not allowed to change dynamically and its value in the structure
|
||||
// passed to the constructor is different from its value in the
|
||||
// structure passed to this method, this method will return an error
|
||||
// without changing any fields.
|
||||
Status ChangeOptions(const Options& options);
|
||||
|
||||
// Add key,value to the table being constructed.
|
||||
// REQUIRES: key is after any previously added key according to comparator.
|
||||
// REQUIRES: Finish(), Abandon() have not been called
|
||||
@ -72,7 +64,7 @@ private:
|
||||
WritableFile* file_;
|
||||
uint64_t offset_ = 0;
|
||||
Status status_;
|
||||
uint64_t num_entries_ = 0;
|
||||
TableProperties properties_;
|
||||
|
||||
const size_t user_key_size_;
|
||||
bool closed_ = false; // Either Finish() or Abandon() has been called.
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "table/block.h"
|
||||
#include "table/filter_block.h"
|
||||
#include "table/format.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/two_level_iterator.h"
|
||||
|
||||
#include "util/coding.h"
|
||||
@ -41,6 +42,7 @@ public:
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
extern const uint64_t kPlainTableMagicNumber;
|
||||
static uint32_t getBucketId(Slice const& s, size_t prefix_len,
|
||||
uint32_t num_buckets) {
|
||||
return MurmurHash(s.data(), prefix_len, 397) % num_buckets;
|
||||
@ -49,18 +51,16 @@ static uint32_t getBucketId(Slice const& s, size_t prefix_len,
|
||||
PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
|
||||
uint64_t file_size, int user_key_size,
|
||||
int key_prefix_len, int bloom_bits_per_key,
|
||||
double hash_table_ratio) :
|
||||
double hash_table_ratio,
|
||||
const TableProperties& table_properties) :
|
||||
hash_table_size_(0), soptions_(storage_options), file_size_(file_size),
|
||||
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
|
||||
hash_table_ratio_(hash_table_ratio) {
|
||||
if (bloom_bits_per_key > 0) {
|
||||
filter_policy_ = NewBloomFilterPolicy(bloom_bits_per_key);
|
||||
} else {
|
||||
filter_policy_ = nullptr;
|
||||
}
|
||||
hash_table_ = nullptr;
|
||||
data_start_offset_ = 0;
|
||||
data_end_offset_ = file_size;
|
||||
hash_table_ratio_(hash_table_ratio),
|
||||
filter_policy_(bloom_bits_per_key > 0 ?
|
||||
NewBloomFilterPolicy(bloom_bits_per_key) : nullptr),
|
||||
table_properties_(table_properties),
|
||||
data_start_offset_(0),
|
||||
data_end_offset_(table_properties_.data_size) {
|
||||
}
|
||||
|
||||
PlainTableReader::~PlainTableReader() {
|
||||
@ -87,19 +87,38 @@ Status PlainTableReader::Open(const Options& options,
|
||||
return Status::NotSupported("File is too large for PlainTableReader!");
|
||||
}
|
||||
|
||||
PlainTableReader* t = new PlainTableReader(soptions, file_size,
|
||||
TableProperties table_properties;
|
||||
auto s = ReadTableProperties(
|
||||
file.get(),
|
||||
file_size,
|
||||
kPlainTableMagicNumber,
|
||||
options.env,
|
||||
options.info_log.get(),
|
||||
&table_properties
|
||||
);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
|
||||
soptions,
|
||||
file_size,
|
||||
user_key_size,
|
||||
key_prefix_len,
|
||||
bloom_num_bits,
|
||||
hash_table_ratio);
|
||||
t->file_ = std::move(file);
|
||||
t->options_ = options;
|
||||
Status s = t->PopulateIndex(file_size);
|
||||
hash_table_ratio,
|
||||
table_properties
|
||||
));
|
||||
new_reader->file_ = std::move(file);
|
||||
new_reader->options_ = options;
|
||||
|
||||
// -- Populate Index
|
||||
s = new_reader->PopulateIndex();
|
||||
if (!s.ok()) {
|
||||
delete t;
|
||||
return s;
|
||||
}
|
||||
table_reader->reset(t);
|
||||
|
||||
*table_reader = std::move(new_reader);
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -114,7 +133,7 @@ Iterator* PlainTableReader::NewIterator(const ReadOptions& options) {
|
||||
return new PlainTableIterator(this);
|
||||
}
|
||||
|
||||
Status PlainTableReader::PopulateIndex(uint64_t file_size) {
|
||||
Status PlainTableReader::PopulateIndex() {
|
||||
// Get mmapped memory to file_data_.
|
||||
Status s = file_->Read(0, file_size_, &file_data_, nullptr);
|
||||
if (!s.ok()) {
|
||||
@ -124,7 +143,6 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
|
||||
version_ ^= 0x80000000;
|
||||
assert(version_ == 1);
|
||||
data_start_offset_ = 4;
|
||||
data_end_offset_ = file_size;
|
||||
|
||||
Slice key_slice;
|
||||
Slice key_prefix_slice;
|
||||
@ -140,7 +158,7 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
|
||||
// are in order.
|
||||
std::vector<std::pair<Slice, std::string>> prefix_index_pairs;
|
||||
std::string current_prefix_index;
|
||||
while (pos < file_size) {
|
||||
while (pos < data_end_offset_) {
|
||||
uint32_t key_offset = pos;
|
||||
status_ = Next(pos, &key_slice, &value_slice, pos);
|
||||
key_prefix_slice = Slice(key_slice.data(), key_prefix_len_);
|
||||
|
@ -78,16 +78,21 @@ public:
|
||||
void SetupForCompaction();
|
||||
|
||||
TableProperties& GetTableProperties() {
|
||||
return tbl_props;
|
||||
return table_properties_;
|
||||
}
|
||||
|
||||
PlainTableReader(const EnvOptions& storage_options, uint64_t file_size,
|
||||
int user_key_size, int key_prefix_len, int bloom_num_bits,
|
||||
double hash_table_ratio);
|
||||
PlainTableReader(
|
||||
const EnvOptions& storage_options,
|
||||
uint64_t file_size,
|
||||
int user_key_size,
|
||||
int key_prefix_len,
|
||||
int bloom_num_bits,
|
||||
double hash_table_ratio,
|
||||
const TableProperties& table_properties);
|
||||
~PlainTableReader();
|
||||
|
||||
private:
|
||||
uint32_t* hash_table_;
|
||||
uint32_t* hash_table_ = nullptr;
|
||||
int hash_table_size_;
|
||||
std::string sub_index_;
|
||||
|
||||
@ -99,8 +104,6 @@ private:
|
||||
Slice file_data_;
|
||||
uint32_t version_;
|
||||
uint32_t file_size_;
|
||||
uint32_t data_start_offset_;
|
||||
uint32_t data_end_offset_;
|
||||
const size_t user_key_size_;
|
||||
const size_t key_prefix_len_;
|
||||
const double hash_table_ratio_;
|
||||
@ -108,7 +111,9 @@ private:
|
||||
std::string filter_str_;
|
||||
Slice filter_slice_;
|
||||
|
||||
TableProperties tbl_props;
|
||||
TableProperties table_properties_;
|
||||
uint32_t data_start_offset_;
|
||||
uint32_t data_end_offset_;
|
||||
|
||||
static const size_t kNumInternalBytes = 8;
|
||||
static const uint32_t kSubIndexMask = 0x80000000;
|
||||
@ -125,7 +130,7 @@ private:
|
||||
// any query to the table.
|
||||
// This query will populate the hash table hash_table_, the second
|
||||
// level of indexes sub_index_ and bloom filter filter_slice_ if enabled.
|
||||
Status PopulateIndex(uint64_t file_size);
|
||||
Status PopulateIndex();
|
||||
|
||||
// Check bloom filter to see whether it might contain this prefix
|
||||
bool MayHavePrefix(const Slice& target_prefix);
|
||||
|
@ -105,4 +105,6 @@ const std::string TablePropertiesNames::kNumEntries =
|
||||
const std::string TablePropertiesNames::kFilterPolicy =
|
||||
"rocksdb.filter.policy";
|
||||
|
||||
extern const std::string kPropertiesBlock = "rocksdb.properties";
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -15,17 +15,22 @@
|
||||
#include "db/db_statistics.h"
|
||||
#include "db/memtable.h"
|
||||
#include "db/write_batch_internal.h"
|
||||
|
||||
#include "rocksdb/cache.h"
|
||||
#include "rocksdb/db.h"
|
||||
#include "rocksdb/plain_table_factory.h"
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/memtablerep.h"
|
||||
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/block_based_table_builder.h"
|
||||
#include "table/block_based_table_factory.h"
|
||||
#include "table/block_based_table_reader.h"
|
||||
#include "table/block_builder.h"
|
||||
#include "table/block.h"
|
||||
#include "table/format.h"
|
||||
|
||||
#include "util/random.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
@ -743,49 +748,6 @@ class Harness {
|
||||
Constructor* constructor_;
|
||||
};
|
||||
|
||||
// Test the empty key
|
||||
TEST(Harness, SimpleEmptyKey) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 1);
|
||||
Add("", "v");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Harness, SimpleSingle) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 2);
|
||||
Add("abc", "v");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Harness, SimpleMulti) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 3);
|
||||
Add("abc", "v");
|
||||
Add("abcd", "v");
|
||||
Add("ac", "v2");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Harness, SimpleSpecialKey) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 4);
|
||||
Add("\xff\xff", "v3");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
static bool Between(uint64_t val, uint64_t low, uint64_t high) {
|
||||
bool result = (val >= low) && (val <= high);
|
||||
if (!result) {
|
||||
@ -801,7 +763,7 @@ class TableTest { };
|
||||
|
||||
// This test include all the basic checks except those for index size and block
|
||||
// size, which will be conducted in separated unit tests.
|
||||
TEST(TableTest, BasicTableProperties) {
|
||||
TEST(TableTest, BasicBlockedBasedTableProperties) {
|
||||
BlockBasedTableConstructor c(BytewiseComparator());
|
||||
|
||||
c.Add("a1", "val1");
|
||||
@ -845,6 +807,47 @@ TEST(TableTest, BasicTableProperties) {
|
||||
);
|
||||
}
|
||||
|
||||
extern const uint64_t kPlainTableMagicNumber;
|
||||
TEST(TableTest, BasicPlainTableProperties) {
|
||||
PlainTableFactory factory(8, 8, 0);
|
||||
StringSink sink;
|
||||
std::unique_ptr<TableBuilder> builder(factory.GetTableBuilder(
|
||||
Options(),
|
||||
&sink,
|
||||
kNoCompression
|
||||
));
|
||||
|
||||
for (char c = 'a'; c <= 'z'; ++c) {
|
||||
std::string key(16, c);
|
||||
std::string value(28, c + 42);
|
||||
builder->Add(key, value);
|
||||
}
|
||||
ASSERT_OK(builder->Finish());
|
||||
|
||||
StringSource source(sink.contents(), 72242);
|
||||
|
||||
TableProperties props;
|
||||
auto s = ReadTableProperties(
|
||||
&source,
|
||||
sink.contents().size(),
|
||||
kPlainTableMagicNumber,
|
||||
Env::Default(),
|
||||
nullptr,
|
||||
&props
|
||||
);
|
||||
ASSERT_OK(s);
|
||||
|
||||
ASSERT_EQ(0ul, props.index_size);
|
||||
ASSERT_EQ(0ul, props.filter_size);
|
||||
ASSERT_EQ(16ul * 26, props.raw_key_size);
|
||||
ASSERT_EQ(28ul * 26, props.raw_value_size);
|
||||
ASSERT_EQ(26ul, props.num_entries);
|
||||
ASSERT_EQ(1ul, props.num_data_blocks);
|
||||
|
||||
// User collected keys
|
||||
// internal keys
|
||||
}
|
||||
|
||||
TEST(TableTest, FilterPolicyNameProperties) {
|
||||
BlockBasedTableConstructor c(BytewiseComparator());
|
||||
c.Add("a1", "val1");
|
||||
@ -1292,6 +1295,48 @@ TEST(MemTableTest, Simple) {
|
||||
delete memtable->Unref();
|
||||
}
|
||||
|
||||
// Test the empty key
|
||||
TEST(Harness, SimpleEmptyKey) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 1);
|
||||
Add("", "v");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Harness, SimpleSingle) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 2);
|
||||
Add("abc", "v");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Harness, SimpleMulti) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 3);
|
||||
Add("abc", "v");
|
||||
Add("abcd", "v");
|
||||
Add("ac", "v2");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Harness, SimpleSpecialKey) {
|
||||
std::vector<TestArgs> args = GenerateArgList();
|
||||
for (unsigned int i = 0; i < args.size(); i++) {
|
||||
Init(args[i]);
|
||||
Random rnd(test::RandomSeed() + 4);
|
||||
Add("\xff\xff", "v3");
|
||||
Test(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user