rocksdb/table/table_properties.cc
Andrew Kryczka 843d2e3137 Shared dictionary compression using reference block
Summary:
This adds a new metablock containing a shared dictionary that is used
to compress all data blocks in the SST file. The size of the shared dictionary
is configurable in CompressionOptions and defaults to 0. It's currently only
used for zlib/lz4/lz4hc, but the block will be stored in the SST regardless of
the compression type if the user chooses a nonzero dictionary size.

During compaction, computes the dictionary by randomly sampling the first
output file in each subcompaction. It pre-computes the intervals to sample
by assuming the output file will have the maximum allowable length. In case
the file is smaller, some of the pre-computed sampling intervals can be beyond
end-of-file, in which case we skip over those samples and the dictionary will
be a bit smaller. After the dictionary is generated using the first file in a
subcompaction, it is loaded into the compression library before writing each
block in each subsequent file of that subcompaction.

On the read path, gets the dictionary from the metablock, if it exists. Then,
loads that dictionary into the compression library before reading each block.

Test Plan: new unit test

Reviewers: yhchiang, IslamAbdelRahman, cyan, sdong

Reviewed By: sdong

Subscribers: andrewkr, yoshinorim, kradhakrishnan, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D52287
2016-04-27 17:36:03 -07:00

181 lines
6.6 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#include "table/table_properties_internal.h"
#include "rocksdb/table_properties.h"
#include "rocksdb/iterator.h"
#include "rocksdb/env.h"
#include "port/port.h"
#include "table/internal_iterator.h"
#include "util/string_util.h"
namespace rocksdb {
const uint32_t TablePropertiesCollectorFactory::Context::kUnknownColumnFamily =
port::kMaxInt32;
namespace {
void AppendProperty(
std::string& props,
const std::string& key,
const std::string& value,
const std::string& prop_delim,
const std::string& kv_delim) {
props.append(key);
props.append(kv_delim);
props.append(value);
props.append(prop_delim);
}
template <class TValue>
void AppendProperty(
std::string& props,
const std::string& key,
const TValue& value,
const std::string& prop_delim,
const std::string& kv_delim) {
AppendProperty(
props, key, ToString(value), prop_delim, kv_delim
);
}
// Seek to the specified meta block.
// Return true if it successfully seeks to that block.
Status SeekToMetaBlock(InternalIterator* meta_iter,
const std::string& block_name, bool* is_found) {
*is_found = true;
meta_iter->Seek(block_name);
if (meta_iter->status().ok() &&
(!meta_iter->Valid() || meta_iter->key() != block_name)) {
*is_found = false;
}
return meta_iter->status();
}
}
std::string TableProperties::ToString(
const std::string& prop_delim,
const std::string& kv_delim) const {
std::string result;
result.reserve(1024);
// Basic Info
AppendProperty(result, "# data blocks", num_data_blocks, prop_delim,
kv_delim);
AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim);
AppendProperty(result, "raw key size", raw_key_size, prop_delim, kv_delim);
AppendProperty(result, "raw average key size",
num_entries != 0 ? 1.0 * raw_key_size / num_entries : 0.0,
prop_delim, kv_delim);
AppendProperty(result, "raw value size", raw_value_size, prop_delim,
kv_delim);
AppendProperty(result, "raw average value size",
num_entries != 0 ? 1.0 * raw_value_size / num_entries : 0.0,
prop_delim, kv_delim);
AppendProperty(result, "data block size", data_size, prop_delim, kv_delim);
AppendProperty(result, "index block size", index_size, prop_delim, kv_delim);
AppendProperty(result, "filter block size", filter_size, prop_delim,
kv_delim);
AppendProperty(result, "(estimated) table size",
data_size + index_size + filter_size, prop_delim, kv_delim);
AppendProperty(
result, "filter policy name",
filter_policy_name.empty() ? std::string("N/A") : filter_policy_name,
prop_delim, kv_delim);
AppendProperty(result, "column family ID",
column_family_id == rocksdb::TablePropertiesCollectorFactory::
Context::kUnknownColumnFamily
? std::string("N/A")
: rocksdb::ToString(column_family_id),
prop_delim, kv_delim);
AppendProperty(
result, "column family name",
column_family_name.empty() ? std::string("N/A") : column_family_name,
prop_delim, kv_delim);
AppendProperty(result, "comparator name",
comparator_name.empty() ? std::string("N/A") : comparator_name,
prop_delim, kv_delim);
AppendProperty(
result, "merge operator name",
merge_operator_name.empty() ? std::string("N/A") : merge_operator_name,
prop_delim, kv_delim);
AppendProperty(result, "property collectors names",
property_collectors_names.empty() ? std::string("N/A")
: property_collectors_names,
prop_delim, kv_delim);
return result;
}
void TableProperties::Add(const TableProperties& tp) {
data_size += tp.data_size;
index_size += tp.index_size;
filter_size += tp.filter_size;
raw_key_size += tp.raw_key_size;
raw_value_size += tp.raw_value_size;
num_data_blocks += tp.num_data_blocks;
num_entries += tp.num_entries;
}
const std::string TablePropertiesNames::kDataSize =
"rocksdb.data.size";
const std::string TablePropertiesNames::kIndexSize =
"rocksdb.index.size";
const std::string TablePropertiesNames::kFilterSize =
"rocksdb.filter.size";
const std::string TablePropertiesNames::kRawKeySize =
"rocksdb.raw.key.size";
const std::string TablePropertiesNames::kRawValueSize =
"rocksdb.raw.value.size";
const std::string TablePropertiesNames::kNumDataBlocks =
"rocksdb.num.data.blocks";
const std::string TablePropertiesNames::kNumEntries =
"rocksdb.num.entries";
const std::string TablePropertiesNames::kFilterPolicy =
"rocksdb.filter.policy";
const std::string TablePropertiesNames::kFormatVersion =
"rocksdb.format.version";
const std::string TablePropertiesNames::kFixedKeyLen =
"rocksdb.fixed.key.length";
const std::string TablePropertiesNames::kColumnFamilyId =
"rocksdb.column.family.id";
const std::string TablePropertiesNames::kColumnFamilyName =
"rocksdb.column.family.name";
const std::string TablePropertiesNames::kComparator = "rocksdb.comparator";
const std::string TablePropertiesNames::kMergeOperator =
"rocksdb.merge.operator";
const std::string TablePropertiesNames::kPropertyCollectors =
"rocksdb.property.collectors";
extern const std::string kPropertiesBlock = "rocksdb.properties";
// Old property block name for backward compatibility
extern const std::string kPropertiesBlockOldName = "rocksdb.stats";
extern const std::string kCompressionDictBlock = "rocksdb.compression_dict";
// Seek to the properties block.
// Return true if it successfully seeks to the properties block.
Status SeekToPropertiesBlock(InternalIterator* meta_iter, bool* is_found) {
Status status = SeekToMetaBlock(meta_iter, kPropertiesBlock, is_found);
if (!*is_found && status.ok()) {
status = SeekToMetaBlock(meta_iter, kPropertiesBlockOldName, is_found);
}
return status;
}
// Seek to the compression dictionary block.
// Return true if it successfully seeks to that block.
Status SeekToCompressionDictBlock(InternalIterator* meta_iter, bool* is_found) {
return SeekToMetaBlock(meta_iter, kCompressionDictBlock, is_found);
}
} // namespace rocksdb