2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2014-11-08 02:23:58 +01:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
2014-11-12 22:05:12 +01:00
|
|
|
#ifndef ROCKSDB_LITE
|
2014-11-08 02:23:58 +01:00
|
|
|
|
2015-10-15 02:08:28 +02:00
|
|
|
#include "tools/sst_dump_tool_imp.h"
|
2014-11-08 02:23:58 +01:00
|
|
|
|
2014-11-09 19:01:50 +01:00
|
|
|
#ifndef __STDC_FORMAT_MACROS
|
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#endif
|
|
|
|
|
2014-11-08 02:23:58 +01:00
|
|
|
#include <inttypes.h>
|
2016-05-07 01:09:09 +02:00
|
|
|
#include <iostream>
|
2016-01-13 03:20:06 +01:00
|
|
|
#include <map>
|
|
|
|
#include <sstream>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "db/memtable.h"
|
|
|
|
#include "db/write_batch_internal.h"
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/immutable_options.h"
|
|
|
|
#include "rocksdb/iterator.h"
|
|
|
|
#include "rocksdb/slice_transform.h"
|
|
|
|
#include "rocksdb/status.h"
|
|
|
|
#include "rocksdb/table_properties.h"
|
2016-05-07 01:09:09 +02:00
|
|
|
#include "rocksdb/utilities/ldb_cmd.h"
|
2016-01-13 03:20:06 +01:00
|
|
|
#include "table/block.h"
|
|
|
|
#include "table/block_based_table_builder.h"
|
|
|
|
#include "table/block_based_table_factory.h"
|
|
|
|
#include "table/block_builder.h"
|
|
|
|
#include "table/format.h"
|
|
|
|
#include "table/meta_blocks.h"
|
|
|
|
#include "table/plain_table_factory.h"
|
2016-05-07 01:09:09 +02:00
|
|
|
#include "table/table_reader.h"
|
2016-01-13 03:20:06 +01:00
|
|
|
#include "util/random.h"
|
2016-05-03 17:46:24 +02:00
|
|
|
#include "util/compression.h"
|
2016-01-13 03:20:06 +01:00
|
|
|
|
2015-09-01 03:35:12 +02:00
|
|
|
#include "port/port.h"
|
2014-11-08 02:23:58 +01:00
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
2015-02-26 01:34:26 +01:00
|
|
|
using std::dynamic_pointer_cast;
|
|
|
|
|
2014-11-08 02:23:58 +01:00
|
|
|
SstFileReader::SstFileReader(const std::string& file_path,
|
|
|
|
bool verify_checksum,
|
|
|
|
bool output_hex)
|
|
|
|
:file_name_(file_path), read_num_(0), verify_checksum_(verify_checksum),
|
|
|
|
output_hex_(output_hex), ioptions_(options_),
|
|
|
|
internal_comparator_(BytewiseComparator()) {
|
|
|
|
fprintf(stdout, "Process %s\n", file_path.c_str());
|
2015-02-26 01:34:26 +01:00
|
|
|
init_result_ = GetTableReader(file_name_);
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
|
2014-11-09 19:01:50 +01:00
|
|
|
extern const uint64_t kBlockBasedTableMagicNumber;
|
|
|
|
extern const uint64_t kLegacyBlockBasedTableMagicNumber;
|
|
|
|
extern const uint64_t kPlainTableMagicNumber;
|
|
|
|
extern const uint64_t kLegacyPlainTableMagicNumber;
|
2014-11-08 02:23:58 +01:00
|
|
|
|
2015-07-24 02:05:33 +02:00
|
|
|
const char* testFileName = "test_file_name";
|
|
|
|
|
2015-02-26 01:34:26 +01:00
|
|
|
Status SstFileReader::GetTableReader(const std::string& file_path) {
|
2016-03-31 00:59:24 +02:00
|
|
|
// Warning about 'magic_number' being uninitialized shows up only in UBsan
|
|
|
|
// builds. Though access is guarded by 's.ok()' checks, fix the issue to
|
|
|
|
// avoid any warnings.
|
|
|
|
uint64_t magic_number = Footer::kInvalidTableMagicNumber;
|
2014-11-08 02:23:58 +01:00
|
|
|
|
|
|
|
// read table magic number
|
|
|
|
Footer footer;
|
|
|
|
|
|
|
|
unique_ptr<RandomAccessFile> file;
|
|
|
|
uint64_t file_size;
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
Status s = options_.env->NewRandomAccessFile(file_path, &file, soptions_);
|
2014-11-08 02:23:58 +01:00
|
|
|
if (s.ok()) {
|
|
|
|
s = options_.env->GetFileSize(file_path, &file_size);
|
|
|
|
}
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
|
|
|
|
file_.reset(new RandomAccessFileReader(std::move(file)));
|
|
|
|
|
2014-11-08 02:23:58 +01:00
|
|
|
if (s.ok()) {
|
|
|
|
s = ReadFooterFromFile(file_.get(), file_size, &footer);
|
|
|
|
}
|
|
|
|
if (s.ok()) {
|
|
|
|
magic_number = footer.table_magic_number();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
if (magic_number == kPlainTableMagicNumber ||
|
|
|
|
magic_number == kLegacyPlainTableMagicNumber) {
|
|
|
|
soptions_.use_mmap_reads = true;
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
options_.env->NewRandomAccessFile(file_path, &file, soptions_);
|
|
|
|
file_.reset(new RandomAccessFileReader(std::move(file)));
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
options_.comparator = &internal_comparator_;
|
|
|
|
// For old sst format, ReadTableProperties might fail but file can be read
|
|
|
|
if (ReadTableProperties(magic_number, file_.get(), file_size).ok()) {
|
|
|
|
SetTableOptionsByMagicNumber(magic_number);
|
|
|
|
} else {
|
|
|
|
SetOldTableOptions();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok()) {
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
s = NewTableReader(ioptions_, soptions_, internal_comparator_, file_size,
|
|
|
|
&table_reader_);
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2015-02-26 01:34:26 +01:00
|
|
|
Status SstFileReader::NewTableReader(
|
|
|
|
const ImmutableCFOptions& ioptions, const EnvOptions& soptions,
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
const InternalKeyComparator& internal_comparator, uint64_t file_size,
|
2015-02-26 01:34:26 +01:00
|
|
|
unique_ptr<TableReader>* table_reader) {
|
|
|
|
// We need to turn off pre-fetching of index and filter nodes for
|
|
|
|
// BlockBasedTable
|
|
|
|
shared_ptr<BlockBasedTableFactory> block_table_factory =
|
|
|
|
dynamic_pointer_cast<BlockBasedTableFactory>(options_.table_factory);
|
|
|
|
|
|
|
|
if (block_table_factory) {
|
|
|
|
return block_table_factory->NewTableReader(
|
Skip bottom-level filter block caching when hit-optimized
Summary:
When Get() or NewIterator() trigger file loads, skip caching the filter block if
(1) optimize_filters_for_hits is set and (2) the file is on the bottommost
level. Also skip checking filters under the same conditions, which means that
for a preloaded file or a file that was trivially-moved to the bottom level, its
filter block will eventually expire from the cache.
- added parameters/instance variables in various places in order to propagate the config ("skip_filters") from version_set to block_based_table_reader
- in BlockBasedTable::Rep, this optimization prevents filter from being loaded when the file is opened simply by setting filter_policy = nullptr
- in BlockBasedTable::Get/BlockBasedTable::NewIterator, this optimization prevents filter from being used (even if it was loaded already) by setting filter = nullptr
Test Plan:
updated unit test:
$ ./db_test --gtest_filter=DBTest.OptimizeFiltersForHits
will also run 'make check'
Reviewers: sdong, igor, paultuckfield, anthony, rven, kradhakrishnan, IslamAbdelRahman, yhchiang
Reviewed By: yhchiang
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D51633
2015-12-23 19:15:07 +01:00
|
|
|
TableReaderOptions(ioptions_, soptions_, internal_comparator_,
|
|
|
|
/*skip_filters=*/false),
|
2015-09-11 20:36:33 +02:00
|
|
|
std::move(file_), file_size, &table_reader_, /*enable_prefetch=*/false);
|
2015-02-26 01:34:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(!block_table_factory);
|
|
|
|
|
|
|
|
// For all other factory implementation
|
|
|
|
return options_.table_factory->NewTableReader(
|
2015-09-11 20:36:33 +02:00
|
|
|
TableReaderOptions(ioptions_, soptions_, internal_comparator_),
|
|
|
|
std::move(file_), file_size, &table_reader_);
|
2015-02-26 01:34:26 +01:00
|
|
|
}
|
|
|
|
|
2014-12-23 22:24:07 +01:00
|
|
|
Status SstFileReader::DumpTable(const std::string& out_filename) {
|
|
|
|
unique_ptr<WritableFile> out_file;
|
|
|
|
Env* env = Env::Default();
|
|
|
|
env->NewWritableFile(out_filename, &out_file, soptions_);
|
|
|
|
Status s = table_reader_->DumpTable(out_file.get());
|
|
|
|
out_file->Close();
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2015-07-24 02:05:33 +02:00
|
|
|
uint64_t SstFileReader::CalculateCompressedTableSize(
|
|
|
|
const TableBuilderOptions& tb_options, size_t block_size) {
|
|
|
|
unique_ptr<WritableFile> out_file;
|
|
|
|
unique_ptr<Env> env(NewMemEnv(Env::Default()));
|
|
|
|
env->NewWritableFile(testFileName, &out_file, soptions_);
|
|
|
|
unique_ptr<WritableFileWriter> dest_writer;
|
|
|
|
dest_writer.reset(new WritableFileWriter(std::move(out_file), soptions_));
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
table_options.block_size = block_size;
|
|
|
|
BlockBasedTableFactory block_based_tf(table_options);
|
2015-08-04 17:42:34 +02:00
|
|
|
unique_ptr<TableBuilder> table_builder;
|
|
|
|
table_builder.reset(block_based_tf.NewTableBuilder(
|
2015-10-09 01:57:35 +02:00
|
|
|
tb_options,
|
|
|
|
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
|
|
|
|
dest_writer.get()));
|
2015-10-13 00:06:38 +02:00
|
|
|
unique_ptr<InternalIterator> iter(table_reader_->NewIterator(ReadOptions()));
|
2015-07-24 02:05:33 +02:00
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
if (!iter->status().ok()) {
|
2015-07-31 02:46:47 +02:00
|
|
|
fputs(iter->status().ToString().c_str(), stderr);
|
2015-07-24 02:05:33 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
2015-08-04 17:42:34 +02:00
|
|
|
table_builder->Add(iter->key(), iter->value());
|
2015-07-24 02:05:33 +02:00
|
|
|
}
|
2015-08-04 17:42:34 +02:00
|
|
|
Status s = table_builder->Finish();
|
2015-07-24 02:05:33 +02:00
|
|
|
if (!s.ok()) {
|
2015-07-31 02:46:47 +02:00
|
|
|
fputs(s.ToString().c_str(), stderr);
|
2015-07-24 02:05:33 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
2015-08-04 17:42:34 +02:00
|
|
|
uint64_t size = table_builder->FileSize();
|
2015-07-24 02:05:33 +02:00
|
|
|
env->DeleteFile(testFileName);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
|
|
|
|
ReadOptions read_options;
|
|
|
|
Options opts;
|
|
|
|
const ImmutableCFOptions imoptions(opts);
|
|
|
|
rocksdb::InternalKeyComparator ikc(opts.comparator);
|
|
|
|
std::vector<std::unique_ptr<IntTblPropCollectorFactory> >
|
|
|
|
block_based_table_factories;
|
|
|
|
|
2015-09-01 03:35:12 +02:00
|
|
|
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
|
2015-07-24 02:05:33 +02:00
|
|
|
|
2016-04-20 07:54:24 +02:00
|
|
|
std::pair<CompressionType,const char*> compressions[] = {
|
|
|
|
{ CompressionType::kNoCompression, "kNoCompression" },
|
|
|
|
{ CompressionType::kSnappyCompression, "kSnappyCompression" },
|
|
|
|
{ CompressionType::kZlibCompression, "kZlibCompression" },
|
|
|
|
{ CompressionType::kBZip2Compression, "kBZip2Compression" },
|
Shared dictionary compression using reference block
Summary:
This adds a new metablock containing a shared dictionary that is used
to compress all data blocks in the SST file. The size of the shared dictionary
is configurable in CompressionOptions and defaults to 0. It's currently only
used for zlib/lz4/lz4hc, but the block will be stored in the SST regardless of
the compression type if the user chooses a nonzero dictionary size.
During compaction, computes the dictionary by randomly sampling the first
output file in each subcompaction. It pre-computes the intervals to sample
by assuming the output file will have the maximum allowable length. In case
the file is smaller, some of the pre-computed sampling intervals can be beyond
end-of-file, in which case we skip over those samples and the dictionary will
be a bit smaller. After the dictionary is generated using the first file in a
subcompaction, it is loaded into the compression library before writing each
block in each subsequent file of that subcompaction.
On the read path, gets the dictionary from the metablock, if it exists. Then,
loads that dictionary into the compression library before reading each block.
Test Plan: new unit test
Reviewers: yhchiang, IslamAbdelRahman, cyan, sdong
Reviewed By: sdong
Subscribers: andrewkr, yoshinorim, kradhakrishnan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D52287
2016-04-28 02:36:03 +02:00
|
|
|
{ CompressionType::kLZ4Compression, "kLZ4Compression" },
|
2016-04-20 07:54:24 +02:00
|
|
|
{ CompressionType::kLZ4HCCompression, "kLZ4HCCompression" },
|
|
|
|
{ CompressionType::kXpressCompression, "kXpressCompression" },
|
|
|
|
{ CompressionType::kZSTDNotFinalCompression, "kZSTDNotFinalCompression" }
|
|
|
|
};
|
|
|
|
|
|
|
|
for (auto& i : compressions) {
|
2016-05-03 17:46:24 +02:00
|
|
|
if (CompressionTypeSupported(i.first)) {
|
|
|
|
CompressionOptions compress_opt;
|
|
|
|
std::string column_family_name;
|
|
|
|
TableBuilderOptions tb_opts(imoptions, ikc, &block_based_table_factories,
|
|
|
|
i.first, compress_opt,
|
|
|
|
nullptr /* compression_dict */,
|
|
|
|
false /* skip_filters */, column_family_name);
|
|
|
|
uint64_t file_size = CalculateCompressedTableSize(tb_opts, block_size);
|
|
|
|
fprintf(stdout, "Compression: %s", i.second);
|
|
|
|
fprintf(stdout, " Size: %" PRIu64 "\n", file_size);
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
|
|
|
|
}
|
2015-07-24 02:05:33 +02:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-08 02:23:58 +01:00
|
|
|
Status SstFileReader::ReadTableProperties(uint64_t table_magic_number,
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
RandomAccessFileReader* file,
|
2014-11-08 02:23:58 +01:00
|
|
|
uint64_t file_size) {
|
|
|
|
TableProperties* table_properties = nullptr;
|
|
|
|
Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number,
|
|
|
|
options_.env, options_.info_log.get(),
|
|
|
|
&table_properties);
|
|
|
|
if (s.ok()) {
|
|
|
|
table_properties_.reset(table_properties);
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "Not able to read table properties\n");
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::SetTableOptionsByMagicNumber(
|
|
|
|
uint64_t table_magic_number) {
|
|
|
|
assert(table_properties_);
|
|
|
|
if (table_magic_number == kBlockBasedTableMagicNumber ||
|
|
|
|
table_magic_number == kLegacyBlockBasedTableMagicNumber) {
|
|
|
|
options_.table_factory = std::make_shared<BlockBasedTableFactory>();
|
|
|
|
fprintf(stdout, "Sst file format: block-based\n");
|
|
|
|
auto& props = table_properties_->user_collected_properties;
|
|
|
|
auto pos = props.find(BlockBasedTablePropertyNames::kIndexType);
|
|
|
|
if (pos != props.end()) {
|
|
|
|
auto index_type_on_file = static_cast<BlockBasedTableOptions::IndexType>(
|
|
|
|
DecodeFixed32(pos->second.c_str()));
|
|
|
|
if (index_type_on_file ==
|
|
|
|
BlockBasedTableOptions::IndexType::kHashSearch) {
|
|
|
|
options_.prefix_extractor.reset(NewNoopTransform());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (table_magic_number == kPlainTableMagicNumber ||
|
|
|
|
table_magic_number == kLegacyPlainTableMagicNumber) {
|
|
|
|
options_.allow_mmap_reads = true;
|
|
|
|
|
|
|
|
PlainTableOptions plain_table_options;
|
|
|
|
plain_table_options.user_key_len = kPlainTableVariableLength;
|
|
|
|
plain_table_options.bloom_bits_per_key = 0;
|
|
|
|
plain_table_options.hash_table_ratio = 0;
|
|
|
|
plain_table_options.index_sparseness = 1;
|
|
|
|
plain_table_options.huge_page_tlb_size = 0;
|
|
|
|
plain_table_options.encoding_type = kPlain;
|
|
|
|
plain_table_options.full_scan_mode = true;
|
|
|
|
|
|
|
|
options_.table_factory.reset(NewPlainTableFactory(plain_table_options));
|
|
|
|
fprintf(stdout, "Sst file format: plain table\n");
|
|
|
|
} else {
|
|
|
|
char error_msg_buffer[80];
|
|
|
|
snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1,
|
|
|
|
"Unsupported table magic number --- %lx",
|
|
|
|
(long)table_magic_number);
|
|
|
|
return Status::InvalidArgument(error_msg_buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::SetOldTableOptions() {
|
|
|
|
assert(table_properties_ == nullptr);
|
|
|
|
options_.table_factory = std::make_shared<BlockBasedTableFactory>();
|
|
|
|
fprintf(stdout, "Sst file format: block-based(old version)\n");
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::ReadSequential(bool print_kv,
|
|
|
|
uint64_t read_num,
|
|
|
|
bool has_from,
|
|
|
|
const std::string& from_key,
|
|
|
|
bool has_to,
|
|
|
|
const std::string& to_key) {
|
|
|
|
if (!table_reader_) {
|
|
|
|
return init_result_;
|
|
|
|
}
|
|
|
|
|
2015-10-13 00:06:38 +02:00
|
|
|
InternalIterator* iter =
|
|
|
|
table_reader_->NewIterator(ReadOptions(verify_checksum_, false));
|
2014-11-08 02:23:58 +01:00
|
|
|
uint64_t i = 0;
|
|
|
|
if (has_from) {
|
2015-04-24 03:08:37 +02:00
|
|
|
InternalKey ikey;
|
|
|
|
ikey.SetMaxPossibleForUserKey(from_key);
|
2014-11-08 02:23:58 +01:00
|
|
|
iter->Seek(ikey.Encode());
|
|
|
|
} else {
|
|
|
|
iter->SeekToFirst();
|
|
|
|
}
|
|
|
|
for (; iter->Valid(); iter->Next()) {
|
|
|
|
Slice key = iter->key();
|
|
|
|
Slice value = iter->value();
|
|
|
|
++i;
|
|
|
|
if (read_num > 0 && i > read_num)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ParsedInternalKey ikey;
|
|
|
|
if (!ParseInternalKey(key, &ikey)) {
|
|
|
|
std::cerr << "Internal Key ["
|
|
|
|
<< key.ToString(true /* in hex*/)
|
|
|
|
<< "] parse error!\n";
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If end marker was specified, we stop before it
|
|
|
|
if (has_to && BytewiseComparator()->Compare(ikey.user_key, to_key) >= 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (print_kv) {
|
|
|
|
fprintf(stdout, "%s => %s\n",
|
|
|
|
ikey.DebugString(output_hex_).c_str(),
|
|
|
|
value.ToString(output_hex_).c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
read_num_ += i;
|
|
|
|
|
|
|
|
Status ret = iter->status();
|
|
|
|
delete iter;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::ReadTableProperties(
|
|
|
|
std::shared_ptr<const TableProperties>* table_properties) {
|
|
|
|
if (!table_reader_) {
|
|
|
|
return init_result_;
|
|
|
|
}
|
|
|
|
|
|
|
|
*table_properties = table_reader_->GetTableProperties();
|
|
|
|
return init_result_;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
void print_help() {
|
|
|
|
fprintf(stderr,
|
2016-04-08 21:05:02 +02:00
|
|
|
R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw]
|
|
|
|
--file=<data_dir_OR_sst_file>
|
|
|
|
Path to SST file or directory containing SST files
|
|
|
|
|
|
|
|
--command=check|scan|raw
|
|
|
|
check: Iterate over entries in files but dont print anything except if an error is encounterd (default command)
|
|
|
|
scan: Iterate over entries in files and print them to screen
|
|
|
|
raw: Dump all the table contents to <file_name>_dump.txt
|
|
|
|
|
|
|
|
--output_hex
|
|
|
|
Can be combined with scan command to print the keys and values in Hex
|
|
|
|
|
|
|
|
--from=<user_key>
|
|
|
|
Key to start reading from when executing check|scan
|
|
|
|
|
|
|
|
--to=<user_key>
|
|
|
|
Key to stop reading at when executing check|scan
|
|
|
|
|
|
|
|
--read_num=<num>
|
|
|
|
Maximum number of entries to read when executing check|scan
|
|
|
|
|
|
|
|
--verify_checksum
|
|
|
|
Verify file checksum when executing check|scan
|
|
|
|
|
|
|
|
--input_key_hex
|
|
|
|
Can be combined with --from and --to to indicate that these values are encoded in Hex
|
|
|
|
|
|
|
|
--show_properties
|
|
|
|
Print table properties after iterating over the file
|
|
|
|
|
|
|
|
--show_compression_sizes
|
|
|
|
Independent command that will recreate the SST file using 16K block size with different
|
|
|
|
compressions and report the size of the file using such compression
|
|
|
|
|
|
|
|
--set_block_size=<block_size>
|
|
|
|
Can be combined with --show_compression_sizes to set the block size that will be used
|
|
|
|
when trying different compression algorithms
|
|
|
|
)");
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
2014-12-23 22:24:07 +01:00
|
|
|
int SSTDumpTool::Run(int argc, char** argv) {
|
2014-11-08 02:23:58 +01:00
|
|
|
const char* dir_or_file = nullptr;
|
|
|
|
uint64_t read_num = -1;
|
|
|
|
std::string command;
|
|
|
|
|
|
|
|
char junk;
|
|
|
|
uint64_t n;
|
|
|
|
bool verify_checksum = false;
|
|
|
|
bool output_hex = false;
|
|
|
|
bool input_key_hex = false;
|
|
|
|
bool has_from = false;
|
|
|
|
bool has_to = false;
|
|
|
|
bool show_properties = false;
|
2015-07-24 02:05:33 +02:00
|
|
|
bool show_compression_sizes = false;
|
|
|
|
bool set_block_size = false;
|
2014-11-08 02:23:58 +01:00
|
|
|
std::string from_key;
|
|
|
|
std::string to_key;
|
2015-07-24 02:05:33 +02:00
|
|
|
std::string block_size_str;
|
|
|
|
size_t block_size;
|
2014-11-08 02:23:58 +01:00
|
|
|
for (int i = 1; i < argc; i++) {
|
|
|
|
if (strncmp(argv[i], "--file=", 7) == 0) {
|
|
|
|
dir_or_file = argv[i] + 7;
|
|
|
|
} else if (strcmp(argv[i], "--output_hex") == 0) {
|
|
|
|
output_hex = true;
|
|
|
|
} else if (strcmp(argv[i], "--input_key_hex") == 0) {
|
|
|
|
input_key_hex = true;
|
|
|
|
} else if (sscanf(argv[i],
|
|
|
|
"--read_num=%lu%c",
|
|
|
|
(unsigned long*)&n, &junk) == 1) {
|
|
|
|
read_num = n;
|
|
|
|
} else if (strcmp(argv[i], "--verify_checksum") == 0) {
|
|
|
|
verify_checksum = true;
|
|
|
|
} else if (strncmp(argv[i], "--command=", 10) == 0) {
|
|
|
|
command = argv[i] + 10;
|
|
|
|
} else if (strncmp(argv[i], "--from=", 7) == 0) {
|
|
|
|
from_key = argv[i] + 7;
|
|
|
|
has_from = true;
|
|
|
|
} else if (strncmp(argv[i], "--to=", 5) == 0) {
|
|
|
|
to_key = argv[i] + 5;
|
|
|
|
has_to = true;
|
|
|
|
} else if (strcmp(argv[i], "--show_properties") == 0) {
|
|
|
|
show_properties = true;
|
2015-07-24 02:05:33 +02:00
|
|
|
} else if (strcmp(argv[i], "--show_compression_sizes") == 0) {
|
|
|
|
show_compression_sizes = true;
|
|
|
|
} else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
|
|
|
|
set_block_size = true;
|
|
|
|
block_size_str = argv[i] + 17;
|
|
|
|
std::istringstream iss(block_size_str);
|
|
|
|
if (iss.fail()) {
|
|
|
|
fprintf(stderr, "block size must be numeric");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
iss >> block_size;
|
2014-11-08 02:23:58 +01:00
|
|
|
} else {
|
|
|
|
print_help();
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (input_key_hex) {
|
|
|
|
if (has_from) {
|
Remove ldb HexToString method's usage of sscanf
Summary:
Fix hex2String performance issues by removing sscanf dependency.
Also fixed some edge case handling (odd length, bad input).
Test Plan: Created a test file which called old and new implementation, and validated results are the same. I'll paste results in the phabricator diff.
Reviewers: igor, rven, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: thatsafunnyname, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D46785
2015-09-23 23:25:46 +02:00
|
|
|
from_key = rocksdb::LDBCommand::HexToString(from_key);
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
if (has_to) {
|
Remove ldb HexToString method's usage of sscanf
Summary:
Fix hex2String performance issues by removing sscanf dependency.
Also fixed some edge case handling (odd length, bad input).
Test Plan: Created a test file which called old and new implementation, and validated results are the same. I'll paste results in the phabricator diff.
Reviewers: igor, rven, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: thatsafunnyname, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D46785
2015-09-23 23:25:46 +02:00
|
|
|
to_key = rocksdb::LDBCommand::HexToString(to_key);
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dir_or_file == nullptr) {
|
|
|
|
print_help();
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> filenames;
|
|
|
|
rocksdb::Env* env = rocksdb::Env::Default();
|
|
|
|
rocksdb::Status st = env->GetChildren(dir_or_file, &filenames);
|
|
|
|
bool dir = true;
|
|
|
|
if (!st.ok()) {
|
|
|
|
filenames.clear();
|
|
|
|
filenames.push_back(dir_or_file);
|
|
|
|
dir = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stdout, "from [%s] to [%s]\n",
|
|
|
|
rocksdb::Slice(from_key).ToString(true).c_str(),
|
|
|
|
rocksdb::Slice(to_key).ToString(true).c_str());
|
|
|
|
|
|
|
|
uint64_t total_read = 0;
|
|
|
|
for (size_t i = 0; i < filenames.size(); i++) {
|
|
|
|
std::string filename = filenames.at(i);
|
|
|
|
if (filename.length() <= 4 ||
|
|
|
|
filename.rfind(".sst") != filename.length() - 4) {
|
|
|
|
// ignore
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (dir) {
|
|
|
|
filename = std::string(dir_or_file) + "/" + filename;
|
|
|
|
}
|
2014-12-23 22:24:07 +01:00
|
|
|
|
2014-11-08 02:23:58 +01:00
|
|
|
rocksdb::SstFileReader reader(filename, verify_checksum,
|
|
|
|
output_hex);
|
2014-12-23 22:24:07 +01:00
|
|
|
if (!reader.getStatus().ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(),
|
|
|
|
reader.getStatus().ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2015-07-24 02:05:33 +02:00
|
|
|
if (show_compression_sizes) {
|
|
|
|
if (set_block_size) {
|
|
|
|
reader.ShowAllCompressionSizes(block_size);
|
|
|
|
} else {
|
|
|
|
reader.ShowAllCompressionSizes(16384);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-12-23 22:24:07 +01:00
|
|
|
if (command == "raw") {
|
|
|
|
std::string out_filename = filename.substr(0, filename.length() - 4);
|
|
|
|
out_filename.append("_dump.txt");
|
|
|
|
|
|
|
|
st = reader.DumpTable(out_filename);
|
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "raw dump written to file %s\n", &out_filename[0]);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-11-08 02:23:58 +01:00
|
|
|
// scan all files in give file path.
|
|
|
|
if (command == "" || command == "scan" || command == "check") {
|
2015-03-23 21:38:17 +01:00
|
|
|
st = reader.ReadSequential(command == "scan",
|
2014-11-08 02:23:58 +01:00
|
|
|
read_num > 0 ? (read_num - total_read) :
|
|
|
|
read_num,
|
|
|
|
has_from, from_key, has_to, to_key);
|
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(),
|
|
|
|
st.ToString().c_str());
|
|
|
|
}
|
|
|
|
total_read += reader.GetReadNumber();
|
|
|
|
if (read_num > 0 && total_read > read_num) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (show_properties) {
|
|
|
|
const rocksdb::TableProperties* table_properties;
|
|
|
|
|
|
|
|
std::shared_ptr<const rocksdb::TableProperties>
|
|
|
|
table_properties_from_reader;
|
|
|
|
st = reader.ReadTableProperties(&table_properties_from_reader);
|
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
|
|
|
|
fprintf(stderr, "Try to use initial table properties\n");
|
|
|
|
table_properties = reader.GetInitTableProperties();
|
|
|
|
} else {
|
|
|
|
table_properties = table_properties_from_reader.get();
|
|
|
|
}
|
|
|
|
if (table_properties != nullptr) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"Table Properties:\n"
|
|
|
|
"------------------------------\n"
|
|
|
|
" %s",
|
|
|
|
table_properties->ToString("\n ", ": ").c_str());
|
2014-11-08 20:35:10 +01:00
|
|
|
fprintf(stdout, "# deleted keys: %" PRIu64 "\n",
|
2014-11-08 02:23:58 +01:00
|
|
|
rocksdb::GetDeletedKeys(
|
|
|
|
table_properties->user_collected_properties));
|
2016-05-19 23:24:48 +02:00
|
|
|
|
|
|
|
bool property_present;
|
|
|
|
uint64_t merge_operands = rocksdb::GetMergeOperands(
|
|
|
|
table_properties->user_collected_properties, &property_present);
|
|
|
|
if (property_present) {
|
|
|
|
fprintf(stdout, " # merge operands: %" PRIu64 "\n", merge_operands);
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, " # merge operands: UNKNOWN\n");
|
|
|
|
}
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-12-23 22:24:07 +01:00
|
|
|
return 0;
|
2014-11-08 02:23:58 +01:00
|
|
|
}
|
|
|
|
} // namespace rocksdb
|
2014-11-13 20:39:30 +01:00
|
|
|
|
|
|
|
#endif // ROCKSDB_LITE
|