2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2015-09-23 21:42:43 +02:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#include "rocksdb/sst_file_writer.h"
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
#include "db/dbformat.h"
|
|
|
|
#include "rocksdb/table.h"
|
|
|
|
#include "table/block_based_table_builder.h"
|
|
|
|
#include "util/file_reader_writer.h"
|
|
|
|
#include "util/string_util.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
const std::string ExternalSstFilePropertyNames::kVersion =
|
|
|
|
"rocksdb.external_sst_file.version";
|
|
|
|
|
|
|
|
// PropertiesCollector used to add properties specific to tables
|
|
|
|
// generated by SstFileWriter
|
|
|
|
class SstFileWriter::SstFileWriterPropertiesCollector
|
|
|
|
: public IntTblPropCollector {
|
|
|
|
public:
|
|
|
|
explicit SstFileWriterPropertiesCollector(int32_t version)
|
|
|
|
: version_(version) {}
|
|
|
|
|
|
|
|
virtual Status InternalAdd(const Slice& key, const Slice& value,
|
|
|
|
uint64_t file_size) override {
|
|
|
|
// Intentionally left blank. Have no interest in collecting stats for
|
|
|
|
// individual key/value pairs.
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual Status Finish(UserCollectedProperties* properties) override {
|
|
|
|
std::string version_val;
|
|
|
|
PutFixed32(&version_val, static_cast<int32_t>(version_));
|
|
|
|
properties->insert({ExternalSstFilePropertyNames::kVersion, version_val});
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual const char* Name() const override {
|
|
|
|
return "SstFileWriterPropertiesCollector";
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual UserCollectedProperties GetReadableProperties() const override {
|
|
|
|
return {{ExternalSstFilePropertyNames::kVersion, ToString(version_)}};
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
int32_t version_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class SstFileWriter::SstFileWriterPropertiesCollectorFactory
|
|
|
|
: public IntTblPropCollectorFactory {
|
|
|
|
public:
|
|
|
|
explicit SstFileWriterPropertiesCollectorFactory(int32_t version)
|
|
|
|
: version_(version) {}
|
|
|
|
|
2015-10-09 01:57:35 +02:00
|
|
|
virtual IntTblPropCollector* CreateIntTblPropCollector(
|
|
|
|
uint32_t column_family_id) override {
|
2015-09-23 21:42:43 +02:00
|
|
|
return new SstFileWriterPropertiesCollector(version_);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual const char* Name() const override {
|
|
|
|
return "SstFileWriterPropertiesCollector";
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
int32_t version_;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct SstFileWriter::Rep {
|
2016-05-17 22:11:56 +02:00
|
|
|
Rep(const EnvOptions& _env_options, const Options& options,
|
2015-09-23 21:42:43 +02:00
|
|
|
const Comparator* _user_comparator)
|
|
|
|
: env_options(_env_options),
|
2016-05-17 22:11:56 +02:00
|
|
|
ioptions(options),
|
|
|
|
mutable_cf_options(options, ioptions),
|
2015-09-23 21:42:43 +02:00
|
|
|
internal_comparator(_user_comparator) {}
|
|
|
|
|
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
|
|
std::unique_ptr<TableBuilder> builder;
|
|
|
|
EnvOptions env_options;
|
|
|
|
ImmutableCFOptions ioptions;
|
2016-05-17 22:11:56 +02:00
|
|
|
MutableCFOptions mutable_cf_options;
|
2015-09-23 21:42:43 +02:00
|
|
|
InternalKeyComparator internal_comparator;
|
|
|
|
ExternalSstFileInfo file_info;
|
2016-04-07 08:10:32 +02:00
|
|
|
std::string column_family_name;
|
2016-06-13 18:57:43 +02:00
|
|
|
InternalKey ikey;
|
2015-09-23 21:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
SstFileWriter::SstFileWriter(const EnvOptions& env_options,
|
2016-05-17 22:11:56 +02:00
|
|
|
const Options& options,
|
2015-09-23 21:42:43 +02:00
|
|
|
const Comparator* user_comparator)
|
2016-05-17 22:11:56 +02:00
|
|
|
: rep_(new Rep(env_options, options, user_comparator)) {}
|
2015-09-23 21:42:43 +02:00
|
|
|
|
|
|
|
SstFileWriter::~SstFileWriter() { delete rep_; }
|
|
|
|
|
|
|
|
Status SstFileWriter::Open(const std::string& file_path) {
|
|
|
|
Rep* r = rep_;
|
|
|
|
Status s;
|
|
|
|
std::unique_ptr<WritableFile> sst_file;
|
|
|
|
s = r->ioptions.env->NewWritableFile(file_path, &sst_file, r->env_options);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2016-06-20 20:26:25 +02:00
|
|
|
CompressionType compression_type;
|
|
|
|
if (r->ioptions.bottommost_compression != kDisableCompressionOption) {
|
|
|
|
compression_type = r->ioptions.bottommost_compression;
|
|
|
|
} else if (!r->ioptions.compression_per_level.empty()) {
|
2015-09-23 21:42:43 +02:00
|
|
|
// Use the compression of the last level if we have per level compression
|
|
|
|
compression_type = *(r->ioptions.compression_per_level.rbegin());
|
2016-06-20 20:26:25 +02:00
|
|
|
} else {
|
|
|
|
compression_type = r->mutable_cf_options.compression;
|
2015-09-23 21:42:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
|
|
|
|
int_tbl_prop_collector_factories;
|
2016-08-20 01:17:56 +02:00
|
|
|
|
|
|
|
// SstFileWriter properties collector to add SstFileWriter version.
|
2015-09-23 21:42:43 +02:00
|
|
|
int_tbl_prop_collector_factories.emplace_back(
|
|
|
|
new SstFileWriterPropertiesCollectorFactory(1 /* version */));
|
|
|
|
|
2016-08-20 01:17:56 +02:00
|
|
|
// User collector factories
|
|
|
|
auto user_collector_factories =
|
|
|
|
r->ioptions.table_properties_collector_factories;
|
|
|
|
for (size_t i = 0; i < user_collector_factories.size(); i++) {
|
|
|
|
int_tbl_prop_collector_factories.emplace_back(
|
|
|
|
new UserKeyTablePropertiesCollectorFactory(
|
|
|
|
user_collector_factories[i]));
|
|
|
|
}
|
|
|
|
|
2015-09-23 21:42:43 +02:00
|
|
|
TableBuilderOptions table_builder_options(
|
|
|
|
r->ioptions, r->internal_comparator, &int_tbl_prop_collector_factories,
|
Shared dictionary compression using reference block
Summary:
This adds a new metablock containing a shared dictionary that is used
to compress all data blocks in the SST file. The size of the shared dictionary
is configurable in CompressionOptions and defaults to 0. It's currently only
used for zlib/lz4/lz4hc, but the block will be stored in the SST regardless of
the compression type if the user chooses a nonzero dictionary size.
During compaction, computes the dictionary by randomly sampling the first
output file in each subcompaction. It pre-computes the intervals to sample
by assuming the output file will have the maximum allowable length. In case
the file is smaller, some of the pre-computed sampling intervals can be beyond
end-of-file, in which case we skip over those samples and the dictionary will
be a bit smaller. After the dictionary is generated using the first file in a
subcompaction, it is loaded into the compression library before writing each
block in each subsequent file of that subcompaction.
On the read path, gets the dictionary from the metablock, if it exists. Then,
loads that dictionary into the compression library before reading each block.
Test Plan: new unit test
Reviewers: yhchiang, IslamAbdelRahman, cyan, sdong
Reviewed By: sdong
Subscribers: andrewkr, yoshinorim, kradhakrishnan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D52287
2016-04-28 02:36:03 +02:00
|
|
|
compression_type, r->ioptions.compression_opts,
|
|
|
|
nullptr /* compression_dict */, false /* skip_filters */,
|
2016-04-07 08:10:32 +02:00
|
|
|
r->column_family_name);
|
2015-09-23 21:42:43 +02:00
|
|
|
r->file_writer.reset(
|
|
|
|
new WritableFileWriter(std::move(sst_file), r->env_options));
|
|
|
|
r->builder.reset(r->ioptions.table_factory->NewTableBuilder(
|
2015-10-09 01:57:35 +02:00
|
|
|
table_builder_options,
|
|
|
|
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
|
|
|
|
r->file_writer.get()));
|
2015-09-23 21:42:43 +02:00
|
|
|
|
|
|
|
r->file_info.file_path = file_path;
|
|
|
|
r->file_info.file_size = 0;
|
|
|
|
r->file_info.num_entries = 0;
|
|
|
|
r->file_info.sequence_number = 0;
|
|
|
|
r->file_info.version = 1;
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileWriter::Add(const Slice& user_key, const Slice& value) {
|
|
|
|
Rep* r = rep_;
|
|
|
|
if (!r->builder) {
|
|
|
|
return Status::InvalidArgument("File is not opened");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (r->file_info.num_entries == 0) {
|
2016-06-13 18:57:43 +02:00
|
|
|
r->file_info.smallest_key.assign(user_key.data(), user_key.size());
|
2015-09-23 21:42:43 +02:00
|
|
|
} else {
|
|
|
|
if (r->internal_comparator.user_comparator()->Compare(
|
|
|
|
user_key, r->file_info.largest_key) <= 0) {
|
|
|
|
// Make sure that keys are added in order
|
|
|
|
return Status::InvalidArgument("Keys must be added in order");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// update file info
|
|
|
|
r->file_info.num_entries++;
|
2016-06-13 18:57:43 +02:00
|
|
|
r->file_info.largest_key.assign(user_key.data(), user_key.size());
|
2015-09-23 21:42:43 +02:00
|
|
|
r->file_info.file_size = r->builder->FileSize();
|
|
|
|
|
2016-06-13 18:57:43 +02:00
|
|
|
r->ikey.Set(user_key, 0 /* Sequence Number */,
|
|
|
|
ValueType::kTypeValue /* Put */);
|
|
|
|
r->builder->Add(r->ikey.Encode(), value);
|
2015-09-23 21:42:43 +02:00
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
|
|
|
|
Rep* r = rep_;
|
|
|
|
if (!r->builder) {
|
|
|
|
return Status::InvalidArgument("File is not opened");
|
|
|
|
}
|
2016-01-25 22:47:07 +01:00
|
|
|
if (r->file_info.num_entries == 0) {
|
|
|
|
return Status::InvalidArgument("Cannot create sst file with no entries");
|
|
|
|
}
|
2015-09-23 21:42:43 +02:00
|
|
|
|
|
|
|
Status s = r->builder->Finish();
|
|
|
|
if (s.ok()) {
|
|
|
|
if (!r->ioptions.disable_data_sync) {
|
|
|
|
s = r->file_writer->Sync(r->ioptions.use_fsync);
|
|
|
|
}
|
|
|
|
if (s.ok()) {
|
|
|
|
s = r->file_writer->Close();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
r->builder->Abandon();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!s.ok()) {
|
|
|
|
r->ioptions.env->DeleteFile(r->file_info.file_path);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok() && file_info != nullptr) {
|
|
|
|
r->file_info.file_size = r->builder->FileSize();
|
|
|
|
*file_info = r->file_info;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->builder.reset();
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
} // namespace rocksdb
|