TablePropertiesCollectorFactory

Summary:
This diff addresses task #4296714 and rethinks how users provide us with TablePropertiesCollectors as part of Options.

Here's description of task #4296714:
       I'm debugging #4295529 and noticed that our count of user properties kDeletedKeys is wrong. We're sharing one single InternalKeyPropertiesCollector with all Table Builders. In LOG Files, we're outputting number of kDeletedKeys as connected with a single table, while it's actually the total count of deleted keys since creation of the DB.

       For example, this table has 3155 entries and 1391828 deleted keys.

The problem with current approach that we call methods on a single TablePropertiesCollector for all the tables we create. Even worse, we could do it from multiple threads at the same time and TablePropertiesCollector has no way of knowing which table we're calling it for.

Good part: Looks like nobody inside Facebook is using Options::table_properties_collectors. This means we should be able to painfully change the API.

In this change, I introduce TablePropertiesCollectorFactory. For every table we create, we call `CreateTablePropertiesCollector`, which creates a TablePropertiesCollector for a single table. We then use it sequentially from a single thread, which means it doesn't have to be thread-safe.

Test Plan:
Added a test in table_properties_collector_test that fails on master (build two tables, assert that kDeletedKeys count is correct for the second one).
Also, all other tests

Reviewers: sdong, dhruba, haobo, kailiu

Reviewed By: kailiu

CC: leveldb

Differential Revision: https://reviews.facebook.net/D18579
This commit is contained in:
Igor Canadi 2014-05-13 12:30:55 -07:00
parent 2082a7d745
commit 26f5dd9a5a
14 changed files with 177 additions and 115 deletions

View File

@ -1,5 +1,10 @@
# Rocksdb Change Log
## Unreleased (3.1.0)
### Public API changes
* Replaced ColumnFamilyOptions::table_properties_collectors with ColumnFamilyOptions::table_properties_collector_factories
## 3.0.0 (05/05/2014)
### Public API changes

View File

@ -104,14 +104,17 @@ ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp,
// All user defined properties collectors will be wrapped by
// UserKeyTablePropertiesCollector since for them they only have the
// knowledge of the user keys; internal keys are invisible to them.
auto& collectors = result.table_properties_collectors;
for (size_t i = 0; i < result.table_properties_collectors.size(); ++i) {
assert(collectors[i]);
collectors[i] =
std::make_shared<UserKeyTablePropertiesCollector>(collectors[i]);
auto& collector_factories = result.table_properties_collector_factories;
for (size_t i = 0; i < result.table_properties_collector_factories.size();
++i) {
assert(collector_factories[i]);
collector_factories[i] =
std::make_shared<UserKeyTablePropertiesCollectorFactory>(
collector_factories[i]);
}
// Add collector to collect internal key statistics
collectors.push_back(std::make_shared<InternalKeyPropertiesCollector>());
collector_factories.push_back(
std::make_shared<InternalKeyPropertiesCollectorFactory>());
return result;
}

View File

@ -36,6 +36,18 @@ class InternalKeyPropertiesCollector : public TablePropertiesCollector {
uint64_t deleted_keys_ = 0;
};
class InternalKeyPropertiesCollectorFactory
: public TablePropertiesCollectorFactory {
public:
virtual TablePropertiesCollector* CreateTablePropertiesCollector() {
return new InternalKeyPropertiesCollector();
}
virtual const char* Name() const override {
return "InternalKeyPropertiesCollectorFactory";
}
};
// When rocksdb creates a new table, it will encode all "user keys" into
// "internal keys", which contains meta information of a given entry.
//
@ -43,19 +55,11 @@ class InternalKeyPropertiesCollector : public TablePropertiesCollector {
// invoked.
class UserKeyTablePropertiesCollector : public TablePropertiesCollector {
public:
explicit UserKeyTablePropertiesCollector(
TablePropertiesCollector* collector) :
UserKeyTablePropertiesCollector(
std::shared_ptr<TablePropertiesCollector>(collector)
) {
}
// transfer of ownership
explicit UserKeyTablePropertiesCollector(TablePropertiesCollector* collector)
: collector_(collector) {}
explicit UserKeyTablePropertiesCollector(
std::shared_ptr<TablePropertiesCollector> collector) :
collector_(collector) {
}
virtual ~UserKeyTablePropertiesCollector() { }
virtual ~UserKeyTablePropertiesCollector() {}
virtual Status Add(const Slice& key, const Slice& value) override;
@ -66,7 +70,26 @@ class UserKeyTablePropertiesCollector : public TablePropertiesCollector {
UserCollectedProperties GetReadableProperties() const override;
protected:
std::shared_ptr<TablePropertiesCollector> collector_;
std::unique_ptr<TablePropertiesCollector> collector_;
};
class UserKeyTablePropertiesCollectorFactory
: public TablePropertiesCollectorFactory {
public:
explicit UserKeyTablePropertiesCollectorFactory(
std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory)
: user_collector_factory_(user_collector_factory) {}
virtual TablePropertiesCollector* CreateTablePropertiesCollector() {
return new UserKeyTablePropertiesCollector(
user_collector_factory_->CreateTablePropertiesCollector());
}
virtual const char* Name() const override {
return user_collector_factory_->Name();
}
private:
std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory_;
};
} // namespace rocksdb

View File

@ -121,11 +121,18 @@ class RegularKeysStartWithA: public TablePropertiesCollector {
return UserCollectedProperties{};
}
private:
uint32_t count_ = 0;
};
class RegularKeysStartWithAFactory : public TablePropertiesCollectorFactory {
public:
virtual TablePropertiesCollector* CreateTablePropertiesCollector() {
return new RegularKeysStartWithA();
}
const char* Name() const { return "RegularKeysStartWithA"; }
};
extern uint64_t kBlockBasedTableMagicNumber;
extern uint64_t kPlainTableMagicNumber;
namespace {
@ -188,14 +195,14 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
// for block based table
for (bool encode_as_internal : { true, false }) {
Options options;
auto collector = new RegularKeysStartWithA();
std::shared_ptr<TablePropertiesCollectorFactory> collector_factory(
new RegularKeysStartWithAFactory());
if (encode_as_internal) {
options.table_properties_collectors = {
std::make_shared<UserKeyTablePropertiesCollector>(collector)
};
options.table_properties_collector_factories.emplace_back(
new UserKeyTablePropertiesCollectorFactory(collector_factory));
} else {
options.table_properties_collectors.resize(1);
options.table_properties_collectors[0].reset(collector);
options.table_properties_collector_factories.resize(1);
options.table_properties_collector_factories[0] = collector_factory;
}
test::PlainInternalKeyComparator ikc(options.comparator);
TestCustomizedTablePropertiesCollector(kBlockBasedTableMagicNumber,
@ -204,9 +211,8 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
// test plain table
Options options;
options.table_properties_collectors.push_back(
std::make_shared<RegularKeysStartWithA>()
);
options.table_properties_collector_factories.emplace_back(
new RegularKeysStartWithAFactory());
options.table_factory = std::make_shared<PlainTableFactory>(8, 8, 0);
test::PlainInternalKeyComparator ikc(options.comparator);
TestCustomizedTablePropertiesCollector(kPlainTableMagicNumber, true, options,
@ -235,9 +241,8 @@ void TestInternalKeyPropertiesCollector(
options.table_factory = table_factory;
if (sanitized) {
options.table_properties_collectors = {
std::make_shared<RegularKeysStartWithA>()
};
options.table_properties_collector_factories.emplace_back(
new RegularKeysStartWithAFactory());
// with sanitization, even regular properties collector will be able to
// handle internal keys.
auto comparator = options.comparator;
@ -249,40 +254,36 @@ void TestInternalKeyPropertiesCollector(
options);
options.comparator = comparator;
} else {
options.table_properties_collectors = {
std::make_shared<InternalKeyPropertiesCollector>()
};
options.table_properties_collector_factories = {
std::make_shared<InternalKeyPropertiesCollectorFactory>()};
}
MakeBuilder(options, pikc, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
}
for (int iter = 0; iter < 2; ++iter) {
MakeBuilder(options, pikc, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
}
ASSERT_OK(builder->Finish());
ASSERT_OK(builder->Finish());
FakeRandomeAccessFile readable(writable->contents());
TableProperties* props;
Status s = ReadTableProperties(
&readable,
writable->contents().size(),
magic_number,
Env::Default(),
nullptr,
&props
);
ASSERT_OK(s);
FakeRandomeAccessFile readable(writable->contents());
TableProperties* props;
Status s =
ReadTableProperties(&readable, writable->contents().size(),
magic_number, Env::Default(), nullptr, &props);
ASSERT_OK(s);
std::unique_ptr<TableProperties> props_guard(props);
auto user_collected = props->user_collected_properties;
uint64_t deleted = GetDeletedKeys(user_collected);
ASSERT_EQ(4u, deleted);
std::unique_ptr<TableProperties> props_guard(props);
auto user_collected = props->user_collected_properties;
uint64_t deleted = GetDeletedKeys(user_collected);
ASSERT_EQ(4u, deleted);
if (sanitized) {
uint32_t starts_with_A = 0;
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(1u, starts_with_A);
if (sanitized) {
uint32_t starts_with_A = 0;
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(1u, starts_with_A);
}
}
}
} // namespace

View File

@ -33,7 +33,7 @@ class MergeOperator;
class Snapshot;
class TableFactory;
class MemTableRepFactory;
class TablePropertiesCollector;
class TablePropertiesCollectorFactory;
class Slice;
class SliceTransform;
class Statistics;
@ -455,11 +455,11 @@ struct ColumnFamilyOptions {
// This option allows user to to collect their own interested statistics of
// the tables.
// Default: emtpy vector -- no user-defined statistics collection will be
// Default: empty vector -- no user-defined statistics collection will be
// performed.
typedef std::vector<std::shared_ptr<TablePropertiesCollector>>
TablePropertiesCollectors;
TablePropertiesCollectors table_properties_collectors;
typedef std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
TablePropertiesCollectorFactories;
TablePropertiesCollectorFactories table_properties_collector_factories;
// Allows thread-safe inplace updates.
// If inplace_callback function is not set,

View File

@ -79,7 +79,10 @@ extern const std::string kPropertiesBlock;
// `TablePropertiesCollector` provides the mechanism for users to collect
// their own interested properties. This class is essentially a collection
// of callback functions that will be invoked during table building.
// of callback functions that will be invoked during table building.
// It is construced with TablePropertiesCollectorFactory. The methods don't
// need to be thread-safe, as we will create exactly one
// TablePropertiesCollector object per table and then call it sequentially
class TablePropertiesCollector {
public:
virtual ~TablePropertiesCollector() {}
@ -95,12 +98,24 @@ class TablePropertiesCollector {
// `properties`.
virtual Status Finish(UserCollectedProperties* properties) = 0;
// The name of the properties collector can be used for debugging purpose.
virtual const char* Name() const = 0;
// Return the human-readable properties, where the key is property name and
// the value is the human-readable form of value.
virtual UserCollectedProperties GetReadableProperties() const = 0;
// The name of the properties collector can be used for debugging purpose.
virtual const char* Name() const = 0;
};
// Constructs TablePropertiesCollector. Internals create a new
// TablePropertiesCollector for each new table
class TablePropertiesCollectorFactory {
public:
virtual ~TablePropertiesCollectorFactory() {}
// has to be thread-safe
virtual TablePropertiesCollector* CreateTablePropertiesCollector() = 0;
// The name of the properties collector can be used for debugging purpose.
virtual const char* Name() const = 0;
};
// Extra properties

View File

@ -1,6 +1,17 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
// Also update Makefile if you change these
#define __ROCKSDB_MAJOR__ 3
#define __ROCKSDB_MINOR__ 0
#define __ROCKSDB_PATCH__ 0
#define ROCKSDB_MAJOR 3
#define ROCKSDB_MINOR 1
#define ROCKSDB_PATCH 0
// Do not use these. We made the mistake of declaring macros starting with
// double underscore. Now we have to live with our choice. We'll deprecate these
// at some point
#define __ROCKSDB_MAJOR__ ROCKSDB_MAJOR
#define __ROCKSDB_MINOR__ ROCKSDB_MINOR
#define __ROCKSDB_PATCH__ ROCKSDB_PATCH

View File

@ -305,6 +305,9 @@ struct BlockBasedTableBuilder::Rep {
std::string compressed_output;
std::unique_ptr<FlushBlockPolicy> flush_block_policy;
std::vector<std::unique_ptr<TablePropertiesCollector>>
table_properties_collectors;
Rep(const Options& opt, const InternalKeyComparator& icomparator,
WritableFile* f, FlushBlockPolicyFactory* flush_block_policy_factory,
CompressionType compression_type, IndexType index_block_type,
@ -322,8 +325,13 @@ struct BlockBasedTableBuilder::Rep {
: new FilterBlockBuilder(opt, &internal_comparator)),
flush_block_policy(flush_block_policy_factory->NewFlushBlockPolicy(
options, data_block)) {
options.table_properties_collectors.push_back(
std::make_shared<BlockBasedTablePropertiesCollector>(index_block_type));
for (auto& collector_factories :
options.table_properties_collector_factories) {
table_properties_collectors.emplace_back(
collector_factories->CreateTablePropertiesCollector());
}
table_properties_collectors.emplace_back(
new BlockBasedTablePropertiesCollector(index_block_type));
}
};
@ -391,12 +399,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
r->props.raw_key_size += key.size();
r->props.raw_value_size += value.size();
NotifyCollectTableCollectorsOnAdd(
key,
value,
r->options.table_properties_collectors,
r->options.info_log.get()
);
NotifyCollectTableCollectorsOnAdd(key, value, r->table_properties_collectors,
r->options.info_log.get());
}
void BlockBasedTableBuilder::Flush() {
@ -590,11 +594,9 @@ Status BlockBasedTableBuilder::Finish() {
property_block_builder.AddTableProperty(r->props);
// Add use collected properties
NotifyCollectTableCollectorsOnFinish(
r->options.table_properties_collectors,
r->options.info_log.get(),
&property_block_builder
);
NotifyCollectTableCollectorsOnFinish(r->table_properties_collectors,
r->options.info_log.get(),
&property_block_builder);
BlockHandle properties_block_handle;
WriteRawBlock(
@ -647,7 +649,7 @@ Status BlockBasedTableBuilder::Finish() {
// user collected properties
std::string user_collected;
user_collected.reserve(1024);
for (auto collector : r->options.table_properties_collectors) {
for (const auto& collector : r->table_properties_collectors) {
for (const auto& prop : collector->GetReadableProperties()) {
user_collected.append(prop.first);
user_collected.append("=");

View File

@ -74,6 +74,7 @@ class BlockBasedTableBuilder : public TableBuilder {
const CompressionType type,
const BlockHandle* handle);
struct Rep;
class BlockBasedTablePropertiesCollectorFactory;
class BlockBasedTablePropertiesCollector;
Rep* rep_;

View File

@ -96,12 +96,11 @@ void LogPropertiesCollectionError(
}
bool NotifyCollectTableCollectorsOnAdd(
const Slice& key,
const Slice& value,
const Options::TablePropertiesCollectors& collectors,
const Slice& key, const Slice& value,
const std::vector<std::unique_ptr<TablePropertiesCollector>>& collectors,
Logger* info_log) {
bool all_succeeded = true;
for (auto collector : collectors) {
for (auto& collector : collectors) {
Status s = collector->Add(key, value);
all_succeeded = all_succeeded && s.ok();
if (!s.ok()) {
@ -113,11 +112,10 @@ bool NotifyCollectTableCollectorsOnAdd(
}
bool NotifyCollectTableCollectorsOnFinish(
const Options::TablePropertiesCollectors& collectors,
Logger* info_log,
PropertyBlockBuilder* builder) {
const std::vector<std::unique_ptr<TablePropertiesCollector>>& collectors,
Logger* info_log, PropertyBlockBuilder* builder) {
bool all_succeeded = true;
for (auto collector : collectors) {
for (auto& collector : collectors) {
UserCollectedProperties user_collected_properties;
Status s = collector->Finish(&user_collected_properties);

View File

@ -6,6 +6,7 @@
#include <map>
#include <memory>
#include <vector>
#include <string>
#include "db/builder.h"
@ -91,17 +92,15 @@ void LogPropertiesCollectionError(
// NotifyCollectTableCollectorsOnAdd() triggers the `Add` event for all
// property collectors.
bool NotifyCollectTableCollectorsOnAdd(
const Slice& key,
const Slice& value,
const Options::TablePropertiesCollectors& collectors,
const Slice& key, const Slice& value,
const std::vector<std::unique_ptr<TablePropertiesCollector>>& collectors,
Logger* info_log);
// NotifyCollectTableCollectorsOnAdd() triggers the `Finish` event for all
// property collectors. The collected properties will be added to `builder`.
bool NotifyCollectTableCollectorsOnFinish(
const Options::TablePropertiesCollectors& collectors,
Logger* info_log,
PropertyBlockBuilder* builder);
const std::vector<std::unique_ptr<TablePropertiesCollector>>& collectors,
Logger* info_log, PropertyBlockBuilder* builder);
// Read the properties from the table.
// @returns a status to indicate if the operation succeeded. On success,

View File

@ -65,6 +65,12 @@ PlainTableBuilder::PlainTableBuilder(const Options& options,
properties_.index_size = 0;
properties_.filter_size = 0;
properties_.format_version = 0;
for (auto& collector_factories :
options.table_properties_collector_factories) {
table_properties_collectors_.emplace_back(
collector_factories->CreateTablePropertiesCollector());
}
}
PlainTableBuilder::~PlainTableBuilder() {
@ -122,12 +128,8 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
properties_.raw_value_size += value.size();
// notify property collectors
NotifyCollectTableCollectorsOnAdd(
key,
value,
options_.table_properties_collectors,
options_.info_log.get()
);
NotifyCollectTableCollectorsOnAdd(key, value, table_properties_collectors_,
options_.info_log.get());
}
Status PlainTableBuilder::status() const { return status_; }
@ -149,11 +151,9 @@ Status PlainTableBuilder::Finish() {
property_block_builder.AddTableProperty(properties_);
// -- Add user collected properties
NotifyCollectTableCollectorsOnFinish(
options_.table_properties_collectors,
options_.info_log.get(),
&property_block_builder
);
NotifyCollectTableCollectorsOnFinish(table_properties_collectors_,
options_.info_log.get(),
&property_block_builder);
// -- Write property block
BlockHandle property_block_handle;

View File

@ -5,9 +5,10 @@
// IndexedTable is a simple table format for UNIT TEST ONLY. It is not built
// as production quality.
#ifndef ROCKSDB_LITE
#pragma once
#ifndef ROCKSDB_LITE
#include <stdint.h>
#include <vector>
#include "rocksdb/options.h"
#include "rocksdb/status.h"
#include "table/table_builder.h"
@ -62,6 +63,8 @@ public:
private:
Options options_;
std::vector<std::unique_ptr<TablePropertiesCollector>>
table_properties_collectors_;
WritableFile* file_;
uint64_t offset_ = 0;
Status status_;

View File

@ -140,7 +140,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
options.max_sequential_skip_in_iterations),
memtable_factory(options.memtable_factory),
table_factory(options.table_factory),
table_properties_collectors(options.table_properties_collectors),
table_properties_collector_factories(
options.table_properties_collector_factories),
inplace_update_support(options.inplace_update_support),
inplace_update_num_locks(options.inplace_update_num_locks),
inplace_callback(options.inplace_callback),
@ -413,8 +414,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
"Options.compaction_options_universal.compression_size_percent: %u",
compaction_options_universal.compression_size_percent);
std::string collector_names;
for (auto collector : table_properties_collectors) {
collector_names.append(collector->Name());
for (const auto& collector_factory : table_properties_collector_factories) {
collector_names.append(collector_factory->Name());
collector_names.append("; ");
}
Log(log, " Options.table_properties_collectors: %s",