Follow-up Cleaning-up After D13521

Summary:
This patch is to address @haobo's comments on D13521:
1. rename Table to be TableReader and make its factory function to be GetTableReader
2. move the compression type selection logic out of TableBuilder but to compaction logic
3. more accurate comments
4. Move stat name constants into BlockBasedTable implementation.
5. remove some uncleaned codes in simple_table_db_test

Test Plan: pass test suites.

Reviewers: haobo, dhruba, kailiu

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13785
This commit is contained in:
Siying Dong 2013-10-30 10:52:33 -07:00
parent 068a819ac9
commit f03b2df010
19 changed files with 349 additions and 654 deletions

View File

@ -18,6 +18,7 @@
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "table/block_based_table_builder.h" #include "table/block_based_table_builder.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -26,9 +27,9 @@ namespace rocksdb {
class TableFactory; class TableFactory;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) { CompressionType compression_type) {
return options.table_factory->GetTableBuilder(options, file, level, return options.table_factory->GetTableBuilder(options, file,
enable_compression); compression_type);
} }
Status BuildTable(const std::string& dbname, Status BuildTable(const std::string& dbname,
@ -63,8 +64,8 @@ Status BuildTable(const std::string& dbname,
return s; return s;
} }
TableBuilder* builder = GetTableBuilder(options, file.get(), 0, TableBuilder* builder = GetTableBuilder(options, file.get(),
enable_compression); options.compression);
// the first key is the smallest key // the first key is the smallest key
Slice key = iter->key(); Slice key = iter->key();

View File

@ -9,6 +9,7 @@
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/types.h" #include "rocksdb/types.h"
#include "rocksdb/options.h"
namespace rocksdb { namespace rocksdb {
@ -23,8 +24,9 @@ class VersionEdit;
class TableBuilder; class TableBuilder;
class WritableFile; class WritableFile;
extern TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, extern TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression); CompressionType compression_type);
// Build a Table file from the contents of *iter. The generated file // Build a Table file from the contents of *iter. The generated file
// will be named according to meta->number. On success, the rest of // will be named according to meta->number. On success, the rest of

View File

@ -211,6 +211,27 @@ Options SanitizeOptions(const std::string& dbname,
return result; return result;
} }
CompressionType GetCompressionType(const Options& options, int level,
const bool enable_compression) {
if (!enable_compression) {
// disable compression
return kNoCompression;
}
// If the use has specified a different compression level for each level,
// then pick the compresison for that level.
if (!options.compression_per_level.empty()) {
const int n = options.compression_per_level.size() - 1;
// It is possible for level_ to be -1; in that case, we use level
// 0's compression. This occurs mostly in backwards compatibility
// situations when the builder doesn't know what level the file
// belongs to. Likewise, if level_ is beyond the end of the
// specified compression levels, use the last value.
return options.compression_per_level[std::max(0, std::min(level, n))];
} else {
return options.compression;
}
}
DBImpl::DBImpl(const Options& options, const std::string& dbname) DBImpl::DBImpl(const Options& options, const std::string& dbname)
: env_(options.env), : env_(options.env),
dbname_(dbname), dbname_(dbname),
@ -1774,10 +1795,12 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
compact->outfile->SetPreallocationBlockSize( compact->outfile->SetPreallocationBlockSize(
1.1 * versions_->MaxFileSizeForLevel(compact->compaction->output_level())); 1.1 * versions_->MaxFileSizeForLevel(compact->compaction->output_level()));
CompressionType compression_type = GetCompressionType(
options_, compact->compaction->output_level(),
compact->compaction->enable_compression());
compact->builder.reset( compact->builder.reset(
GetTableBuilder(options_, compact->outfile.get(), GetTableBuilder(options_, compact->outfile.get(), compression_type));
compact->compaction->output_level(),
compact->compaction->enable_compression()));
} }
return s; return s;
} }

View File

@ -444,4 +444,13 @@ extern Options SanitizeOptions(const std::string& db,
const InternalFilterPolicy* ipolicy, const InternalFilterPolicy* ipolicy,
const Options& src); const Options& src);
// Determine compression type, based on user options, level of the output
// file and whether compression is disabled.
// If enable_compression is false, then compression is always disabled no
// matter what the values of the other two parameters are.
// Otherwise, the compression type is determined based on options and level.
CompressionType GetCompressionType(const Options& options, int level,
const bool enable_compression);
} // namespace rocksdb } // namespace rocksdb

View File

@ -60,10 +60,9 @@ namespace rocksdb {
// | index_block_offset (8 bytes) | // | index_block_offset (8 bytes) |
// +------------------------------+ // +------------------------------+
// SimpleTable is a simple table format for UNIT TEST ONLY. It is not built // SimpleTable is a simple table format for UNIT TEST ONLY. It is not built
// as production quality. // as production quality.
class SimpleTable : public Table { class SimpleTableReader: public TableReader {
public: public:
// Attempt to open the table that is stored in bytes [0..file_size) // Attempt to open the table that is stored in bytes [0..file_size)
// of "file", and read the metadata entries necessary to allow // of "file", and read the metadata entries necessary to allow
@ -77,19 +76,16 @@ class SimpleTable : public Table {
// for the duration of the returned table's lifetime. // for the duration of the returned table's lifetime.
// //
// *file must remain live while this Table is in use. // *file must remain live while this Table is in use.
static Status Open(const Options& options, static Status Open(const Options& options, const EnvOptions& soptions,
const EnvOptions& soptions, unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<RandomAccessFile>&& file, unique_ptr<TableReader>* table_reader);
uint64_t file_size,
unique_ptr<Table>* table);
bool PrefixMayMatch(const Slice& internal_prefix) override; bool PrefixMayMatch(const Slice& internal_prefix) override;
Iterator* NewIterator(const ReadOptions&) override; Iterator* NewIterator(const ReadOptions&) override;
Status Get( Status Get(
const ReadOptions&, const Slice& key, const ReadOptions&, const Slice& key, void* arg,
void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool), bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) override; void (*mark_key_may_exist)(void*) = nullptr) override;
@ -101,13 +97,13 @@ class SimpleTable : public Table {
TableStats& GetTableStats() override; TableStats& GetTableStats() override;
~SimpleTable(); ~SimpleTableReader();
private: private:
struct Rep; struct Rep;
Rep* rep_; Rep* rep_;
explicit SimpleTable(Rep* rep) { explicit SimpleTableReader(Rep* rep) {
rep_ = rep; rep_ = rep;
} }
friend class TableCache; friend class TableCache;
@ -116,14 +112,14 @@ class SimpleTable : public Table {
Status GetOffset(const Slice& target, uint64_t* offset); Status GetOffset(const Slice& target, uint64_t* offset);
// No copying allowed // No copying allowed
explicit SimpleTable(const Table&) = delete; explicit SimpleTableReader(const TableReader&) = delete;
void operator=(const Table&) = delete; void operator=(const TableReader&) = delete;
}; };
// Iterator to iterate SimpleTable // Iterator to iterate SimpleTable
class SimpleTableIterator: public Iterator { class SimpleTableIterator: public Iterator {
public: public:
explicit SimpleTableIterator(SimpleTable* table); explicit SimpleTableIterator(SimpleTableReader* table);
~SimpleTableIterator(); ~SimpleTableIterator();
bool Valid() const; bool Valid() const;
@ -145,7 +141,7 @@ public:
Status status() const; Status status() const;
private: private:
SimpleTable* table_; SimpleTableReader* table_;
uint64_t offset_; uint64_t offset_;
uint64_t next_offset_; uint64_t next_offset_;
Slice key_; Slice key_;
@ -160,7 +156,7 @@ private:
void operator=(const Iterator&) = delete; void operator=(const Iterator&) = delete;
}; };
struct SimpleTable::Rep { struct SimpleTableReader::Rep {
~Rep() { ~Rep() {
} }
Rep(const EnvOptions& storage_options, uint64_t index_start_offset, Rep(const EnvOptions& storage_options, uint64_t index_start_offset,
@ -186,13 +182,15 @@ struct SimpleTable::Rep {
} }
}; };
SimpleTable::~SimpleTable() { SimpleTableReader::~SimpleTableReader() {
delete rep_; delete rep_;
} }
Status SimpleTable::Open(const Options& options, const EnvOptions& soptions, Status SimpleTableReader::Open(const Options& options,
unique_ptr<RandomAccessFile> && file, uint64_t size, const EnvOptions& soptions,
unique_ptr<Table>* table) { unique_ptr<RandomAccessFile> && file,
uint64_t size,
unique_ptr<TableReader>* table_reader) {
char footer_space[Rep::offset_length]; char footer_space[Rep::offset_length];
Slice footer_input; Slice footer_input;
Status s = file->Read(size - Rep::offset_length, Rep::offset_length, Status s = file->Read(size - Rep::offset_length, Rep::offset_length,
@ -202,33 +200,33 @@ Status SimpleTable::Open(const Options& options, const EnvOptions& soptions,
int num_entries = (size - Rep::offset_length - index_start_offset) int num_entries = (size - Rep::offset_length - index_start_offset)
/ (Rep::GetInternalKeyLength() + Rep::offset_length); / (Rep::GetInternalKeyLength() + Rep::offset_length);
SimpleTable::Rep* rep = new SimpleTable::Rep(soptions, index_start_offset, SimpleTableReader::Rep* rep = new SimpleTableReader::Rep(soptions,
index_start_offset,
num_entries); num_entries);
rep->file = std::move(file); rep->file = std::move(file);
rep->options = options; rep->options = options;
table->reset(new SimpleTable(rep)); table_reader->reset(new SimpleTableReader(rep));
} }
return s; return s;
} }
void SimpleTable::SetupForCompaction() { void SimpleTableReader::SetupForCompaction() {
} }
TableStats& SimpleTable::GetTableStats() { TableStats& SimpleTableReader::GetTableStats() {
return rep_->table_stats; return rep_->table_stats;
} }
bool SimpleTable::PrefixMayMatch(const Slice& internal_prefix) { bool SimpleTableReader::PrefixMayMatch(const Slice& internal_prefix) {
return true; return true;
} }
Iterator* SimpleTable::NewIterator(const ReadOptions& options) { Iterator* SimpleTableReader::NewIterator(const ReadOptions& options) {
return new SimpleTableIterator(this); return new SimpleTableIterator(this);
} }
Status SimpleTable::GetOffset(const Slice& target, uint64_t* offset) { Status SimpleTableReader::GetOffset(const Slice& target, uint64_t* offset) {
uint32_t left = 0; uint32_t left = 0;
uint32_t right = rep_->num_entries - 1; uint32_t right = rep_->num_entries - 1;
char key_chars[Rep::GetInternalKeyLength()]; char key_chars[Rep::GetInternalKeyLength()];
@ -281,7 +279,8 @@ Status SimpleTable::GetOffset(const Slice& target, uint64_t* offset) {
return s; return s;
} }
Status SimpleTable::Get(const ReadOptions& options, const Slice& k, void* arg, Status SimpleTableReader::Get(
const ReadOptions& options, const Slice& k, void* arg,
bool (*saver)(void*, const Slice&, const Slice&, bool), bool (*saver)(void*, const Slice&, const Slice&, bool),
void (*mark_key_may_exist)(void*)) { void (*mark_key_may_exist)(void*)) {
Status s; Status s;
@ -296,18 +295,18 @@ Status SimpleTable::Get(const ReadOptions& options, const Slice& k, void* arg,
return s; return s;
} }
bool SimpleTable::TEST_KeyInCache(const ReadOptions& options, bool SimpleTableReader::TEST_KeyInCache(const ReadOptions& options,
const Slice& key) { const Slice& key) {
return false; return false;
} }
uint64_t SimpleTable::ApproximateOffsetOf(const Slice& key) { uint64_t SimpleTableReader::ApproximateOffsetOf(const Slice& key) {
return 0; return 0;
} }
SimpleTableIterator::SimpleTableIterator(SimpleTable* table) : SimpleTableIterator::SimpleTableIterator(SimpleTableReader* table) :
table_(table) { table_(table) {
key_str_ = new char[table->rep_->GetInternalKeyLength()]; key_str_ = new char[SimpleTableReader::Rep::GetInternalKeyLength()];
value_str_len_ = -1; value_str_len_ = -1;
SeekToFirst(); SeekToFirst();
} }
@ -346,7 +345,7 @@ void SimpleTableIterator::Next() {
return; return;
} }
Slice result; Slice result;
int internal_key_size = table_->rep_->GetInternalKeyLength(); int internal_key_size = SimpleTableReader::Rep::GetInternalKeyLength();
Status s = table_->rep_->file->Read(next_offset_, internal_key_size, &result, Status s = table_->rep_->file->Read(next_offset_, internal_key_size, &result,
key_str_); key_str_);
@ -396,7 +395,8 @@ class SimpleTableBuilder : public TableBuilder {
// caller to close the file after calling Finish(). The output file // caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means // will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside. // that the caller does not know which level the output file will reside.
SimpleTableBuilder(const Options& options, WritableFile* file, int level=-1); SimpleTableBuilder(const Options& options, WritableFile* file,
CompressionType compression_type);
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~SimpleTableBuilder(); ~SimpleTableBuilder();
@ -457,17 +457,17 @@ struct SimpleTableBuilder::Rep {
std::string index; std::string index;
Rep(const Options& opt, WritableFile* f) Rep(const Options& opt, WritableFile* f) :
: options(opt), options(opt), file(f) {
file(f) {
} }
~Rep() { ~Rep() {
} }
}; };
SimpleTableBuilder::SimpleTableBuilder(const Options& options, SimpleTableBuilder::SimpleTableBuilder(const Options& options,
WritableFile* file, int level) WritableFile* file,
: TableBuilder(level), rep_(new SimpleTableBuilder::Rep(options, file)) { CompressionType compression_type) :
rep_(new SimpleTableBuilder::Rep(options, file)) {
} }
SimpleTableBuilder::~SimpleTableBuilder() { SimpleTableBuilder::~SimpleTableBuilder() {
@ -533,202 +533,48 @@ uint64_t SimpleTableBuilder::FileSize() const {
class SimpleTableFactory: public TableFactory { class SimpleTableFactory: public TableFactory {
public: public:
~SimpleTableFactory() {} ~SimpleTableFactory() {
SimpleTableFactory() {} }
SimpleTableFactory() {
}
const char* Name() const override { const char* Name() const override {
return "SimpleTable"; return "SimpleTable";
} }
Status OpenTable(const Options& options, const EnvOptions& soptions, Status GetTableReader(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<Table>* table) const;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) const;
};
Status SimpleTableFactory::OpenTable(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, unique_ptr<RandomAccessFile> && file,
uint64_t file_size, uint64_t file_size,
unique_ptr<Table>* table) const { unique_ptr<TableReader>* table_reader) const;
return SimpleTable::Open(options, soptions, std::move(file), file_size, TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
table); CompressionType compression_type) const;
};
Status SimpleTableFactory::GetTableReader(
const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const {
return SimpleTableReader::Open(options, soptions, std::move(file), file_size,
table_reader);
} }
TableBuilder* SimpleTableFactory::GetTableBuilder( TableBuilder* SimpleTableFactory::GetTableBuilder(
const Options& options, WritableFile* file, int level, const Options& options, WritableFile* file,
const bool enable_compression) const { CompressionType compression_type) const {
return new SimpleTableBuilder(options, file, level); return new SimpleTableBuilder(options, file, compression_type);
} }
namespace anon {
class AtomicCounter {
private:
port::Mutex mu_;
int count_;
public:
AtomicCounter() : count_(0) { }
void Increment() {
MutexLock l(&mu_);
count_++;
}
int Read() {
MutexLock l(&mu_);
return count_;
}
void Reset() {
MutexLock l(&mu_);
count_ = 0;
}
};
}
// Special Env used to delay background operations
class SpecialEnv : public EnvWrapper {
public:
// sstable Sync() calls are blocked while this pointer is non-nullptr.
port::AtomicPointer delay_sstable_sync_;
// Simulate no-space errors while this pointer is non-nullptr.
port::AtomicPointer no_space_;
// Simulate non-writable file system while this pointer is non-nullptr
port::AtomicPointer non_writable_;
// Force sync of manifest files to fail while this pointer is non-nullptr
port::AtomicPointer manifest_sync_error_;
// Force write to manifest files to fail while this pointer is non-nullptr
port::AtomicPointer manifest_write_error_;
bool count_random_reads_;
anon::AtomicCounter random_read_counter_;
anon::AtomicCounter sleep_counter_;
explicit SpecialEnv(Env* base) : EnvWrapper(base) {
delay_sstable_sync_.Release_Store(nullptr);
no_space_.Release_Store(nullptr);
non_writable_.Release_Store(nullptr);
count_random_reads_ = false;
manifest_sync_error_.Release_Store(nullptr);
manifest_write_error_.Release_Store(nullptr);
}
Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
const EnvOptions& soptions) {
class SSTableFile : public WritableFile {
private:
SpecialEnv* env_;
unique_ptr<WritableFile> base_;
public:
SSTableFile(SpecialEnv* env, unique_ptr<WritableFile>&& base)
: env_(env),
base_(std::move(base)) {
}
Status Append(const Slice& data) {
if (env_->no_space_.Acquire_Load() != nullptr) {
// Drop writes on the floor
return Status::OK();
} else {
return base_->Append(data);
}
}
Status Close() { return base_->Close(); }
Status Flush() { return base_->Flush(); }
Status Sync() {
while (env_->delay_sstable_sync_.Acquire_Load() != nullptr) {
env_->SleepForMicroseconds(100000);
}
return base_->Sync();
}
};
class ManifestFile : public WritableFile {
private:
SpecialEnv* env_;
unique_ptr<WritableFile> base_;
public:
ManifestFile(SpecialEnv* env, unique_ptr<WritableFile>&& b)
: env_(env), base_(std::move(b)) { }
Status Append(const Slice& data) {
if (env_->manifest_write_error_.Acquire_Load() != nullptr) {
return Status::IOError("simulated writer error");
} else {
return base_->Append(data);
}
}
Status Close() { return base_->Close(); }
Status Flush() { return base_->Flush(); }
Status Sync() {
if (env_->manifest_sync_error_.Acquire_Load() != nullptr) {
return Status::IOError("simulated sync error");
} else {
return base_->Sync();
}
}
};
if (non_writable_.Acquire_Load() != nullptr) {
return Status::IOError("simulated write error");
}
Status s = target()->NewWritableFile(f, r, soptions);
if (s.ok()) {
if (strstr(f.c_str(), ".sst") != nullptr) {
r->reset(new SSTableFile(this, std::move(*r)));
} else if (strstr(f.c_str(), "MANIFEST") != nullptr) {
r->reset(new ManifestFile(this, std::move(*r)));
}
}
return s;
}
Status NewRandomAccessFile(const std::string& f,
unique_ptr<RandomAccessFile>* r,
const EnvOptions& soptions) {
class CountingFile : public RandomAccessFile {
private:
unique_ptr<RandomAccessFile> target_;
anon::AtomicCounter* counter_;
public:
CountingFile(unique_ptr<RandomAccessFile>&& target,
anon::AtomicCounter* counter)
: target_(std::move(target)), counter_(counter) {
}
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
counter_->Increment();
return target_->Read(offset, n, result, scratch);
}
};
Status s = target()->NewRandomAccessFile(f, r, soptions);
if (s.ok() && count_random_reads_) {
r->reset(new CountingFile(std::move(*r), &random_read_counter_));
}
return s;
}
virtual void SleepForMicroseconds(int micros) {
sleep_counter_.Increment();
target()->SleepForMicroseconds(micros);
}
};
class SimpleTableDBTest { class SimpleTableDBTest {
protected: protected:
public: public:
std::string dbname_; std::string dbname_;
SpecialEnv* env_; Env* env_;
DB* db_; DB* db_;
Options last_options_; Options last_options_;
SimpleTableDBTest() : env_(new SpecialEnv(Env::Default())) { SimpleTableDBTest() :
env_(Env::Default()) {
dbname_ = test::TmpDir() + "/simple_table_db_test"; dbname_ = test::TmpDir() + "/simple_table_db_test";
ASSERT_OK(DestroyDB(dbname_, Options())); ASSERT_OK(DestroyDB(dbname_, Options()));
db_ = nullptr; db_ = nullptr;
@ -738,7 +584,6 @@ class SimpleTableDBTest {
~SimpleTableDBTest() { ~SimpleTableDBTest() {
delete db_; delete db_;
ASSERT_OK(DestroyDB(dbname_, Options())); ASSERT_OK(DestroyDB(dbname_, Options()));
delete env_;
} }
// Return the current option configuration. // Return the current option configuration.
@ -813,81 +658,6 @@ class SimpleTableDBTest {
return result; return result;
} }
// Return a string that contains all key,value pairs in order,
// formatted like "(k1->v1)(k2->v2)".
std::string Contents() {
std::vector<std::string> forward;
std::string result;
Iterator* iter = db_->NewIterator(ReadOptions());
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
std::string s = IterStatus(iter);
result.push_back('(');
result.append(s);
result.push_back(')');
forward.push_back(s);
}
// Check reverse iteration results are the reverse of forward results
unsigned int matched = 0;
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
ASSERT_LT(matched, forward.size());
ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
matched++;
}
ASSERT_EQ(matched, forward.size());
delete iter;
return result;
}
std::string AllEntriesFor(const Slice& user_key) {
Iterator* iter = dbfull()->TEST_NewInternalIterator();
InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
iter->Seek(target.Encode());
std::string result;
if (!iter->status().ok()) {
result = iter->status().ToString();
} else {
result = "[ ";
bool first = true;
while (iter->Valid()) {
ParsedInternalKey ikey;
if (!ParseInternalKey(iter->key(), &ikey)) {
result += "CORRUPTED";
} else {
if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
break;
}
if (!first) {
result += ", ";
}
first = false;
switch (ikey.type) {
case kTypeValue:
result += iter->value().ToString();
break;
case kTypeMerge:
// keep it the same as kTypeValue for testing kMergePut
result += iter->value().ToString();
break;
case kTypeDeletion:
result += "DEL";
break;
case kTypeLogData:
assert(false);
break;
}
}
iter->Next();
}
if (!first) {
result += " ";
}
result += "]";
}
delete iter;
return result;
}
int NumTableFilesAtLevel(int level) { int NumTableFilesAtLevel(int level) {
std::string property; std::string property;
@ -897,14 +667,6 @@ class SimpleTableDBTest {
return atoi(property.c_str()); return atoi(property.c_str());
} }
int TotalTableFiles() {
int result = 0;
for (int level = 0; level < db_->NumberLevels(); level++) {
result += NumTableFilesAtLevel(level);
}
return result;
}
// Return spread of files per level // Return spread of files per level
std::string FilesPerLevel() { std::string FilesPerLevel() {
std::string result; std::string result;
@ -922,71 +684,6 @@ class SimpleTableDBTest {
return result; return result;
} }
int CountFiles() {
std::vector<std::string> files;
env_->GetChildren(dbname_, &files);
std::vector<std::string> logfiles;
if (dbname_ != last_options_.wal_dir) {
env_->GetChildren(last_options_.wal_dir, &logfiles);
}
return static_cast<int>(files.size() + logfiles.size());
}
int CountLiveFiles() {
std::vector<std::string> files;
uint64_t manifest_file_size;
db_->GetLiveFiles(files, &manifest_file_size);
return files.size();
}
uint64_t Size(const Slice& start, const Slice& limit) {
Range r(start, limit);
uint64_t size;
db_->GetApproximateSizes(&r, 1, &size);
return size;
}
void Compact(const Slice& start, const Slice& limit) {
db_->CompactRange(&start, &limit);
}
// Do n memtable compactions, each of which produces an sstable
// covering the range [small,large].
void MakeTables(int n, const std::string& small, const std::string& large) {
for (int i = 0; i < n; i++) {
Put(small, "begin");
Put(large, "end");
dbfull()->TEST_FlushMemTable();
}
}
// Prevent pushing of new sstables into deeper levels by adding
// tables that cover a specified range to all levels.
void FillLevels(const std::string& smallest, const std::string& largest) {
MakeTables(db_->NumberLevels(), smallest, largest);
}
void DumpFileCounts(const char* label) {
fprintf(stderr, "---\n%s:\n", label);
fprintf(stderr, "maxoverlap: %lld\n",
static_cast<long long>(
dbfull()->TEST_MaxNextLevelOverlappingBytes()));
for (int level = 0; level < db_->NumberLevels(); level++) {
int num = NumTableFilesAtLevel(level);
if (num > 0) {
fprintf(stderr, " level %3d : %d files\n", level, num);
}
}
}
std::string DumpSSTableList() {
std::string property;
db_->GetProperty("rocksdb.sstables", &property);
return property;
}
std::string IterStatus(Iterator* iter) { std::string IterStatus(Iterator* iter) {
std::string result; std::string result;
if (iter->Valid()) { if (iter->Valid()) {
@ -996,26 +693,6 @@ class SimpleTableDBTest {
} }
return result; return result;
} }
Options OptionsForLogIterTest() {
Options options = CurrentOptions();
options.create_if_missing = true;
options.WAL_ttl_seconds = 1000;
return options;
}
std::unique_ptr<TransactionLogIterator> OpenTransactionLogIter(
const SequenceNumber seq) {
unique_ptr<TransactionLogIterator> iter;
Status status = dbfull()->GetUpdatesSince(seq, &iter);
ASSERT_OK(status);
ASSERT_TRUE(iter->Valid());
return std::move(iter);
}
std::string DummyString(size_t len, char c = 'a') {
return std::string(len, c);
}
}; };
TEST(SimpleTableDBTest, Empty) { TEST(SimpleTableDBTest, Empty) {
@ -1085,8 +762,7 @@ TEST(SimpleTableDBTest, CompactionTrigger) {
Random rnd(301); Random rnd(301);
for (int num = 0; for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
num < options.level0_file_num_compaction_trigger - 1;
num++) { num++) {
std::vector<std::string> values; std::vector<std::string> values;
// Write 120KB (12 values, each 10K) // Write 120KB (12 values, each 10K)

View File

@ -19,8 +19,8 @@
namespace rocksdb { namespace rocksdb {
static void DeleteEntry(const Slice& key, void* value) { static void DeleteEntry(const Slice& key, void* value) {
Table* table = reinterpret_cast<Table*>(value); TableReader* table_reader = reinterpret_cast<TableReader*>(value);
delete table; delete table_reader;
} }
static void UnrefEntry(void* arg1, void* arg2) { static void UnrefEntry(void* arg1, void* arg2) {
@ -63,7 +63,7 @@ Status TableCache::FindTable(const EnvOptions& toptions,
} }
std::string fname = TableFileName(dbname_, file_number); std::string fname = TableFileName(dbname_, file_number);
unique_ptr<RandomAccessFile> file; unique_ptr<RandomAccessFile> file;
unique_ptr<Table> table; unique_ptr<TableReader> table_reader;
s = env_->NewRandomAccessFile(fname, &file, toptions); s = env_->NewRandomAccessFile(fname, &file, toptions);
RecordTick(options_->statistics, NO_FILE_OPENS); RecordTick(options_->statistics, NO_FILE_OPENS);
if (s.ok()) { if (s.ok()) {
@ -71,19 +71,19 @@ Status TableCache::FindTable(const EnvOptions& toptions,
file->Hint(RandomAccessFile::RANDOM); file->Hint(RandomAccessFile::RANDOM);
} }
StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS); StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS);
s = options_->table_factory->OpenTable(*options_, toptions, s = options_->table_factory->GetTableReader(*options_, toptions,
std::move(file), std::move(file), file_size,
file_size, &table); &table_reader);
} }
if (!s.ok()) { if (!s.ok()) {
assert(table == nullptr); assert(table_reader == nullptr);
RecordTick(options_->statistics, NO_FILE_ERRORS); RecordTick(options_->statistics, NO_FILE_ERRORS);
// We do not cache error results so that if the error is transient, // We do not cache error results so that if the error is transient,
// or somebody repairs the file, we recover automatically. // or somebody repairs the file, we recover automatically.
} else { } else {
assert(file.get() == nullptr); assert(file.get() == nullptr);
*handle = cache_->Insert(key, table.release(), 1, &DeleteEntry); *handle = cache_->Insert(key, table_reader.release(), 1, &DeleteEntry);
} }
} }
return s; return s;
@ -93,10 +93,10 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
const EnvOptions& toptions, const EnvOptions& toptions,
uint64_t file_number, uint64_t file_number,
uint64_t file_size, uint64_t file_size,
Table** tableptr, TableReader** table_reader_ptr,
bool for_compaction) { bool for_compaction) {
if (tableptr != nullptr) { if (table_reader_ptr != nullptr) {
*tableptr = nullptr; *table_reader_ptr = nullptr;
} }
Cache::Handle* handle = nullptr; Cache::Handle* handle = nullptr;
@ -106,16 +106,16 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
return NewErrorIterator(s); return NewErrorIterator(s);
} }
Table* table = TableReader* table_reader =
reinterpret_cast<Table*>(cache_->Value(handle)); reinterpret_cast<TableReader*>(cache_->Value(handle));
Iterator* result = table->NewIterator(options); Iterator* result = table_reader->NewIterator(options);
result->RegisterCleanup(&UnrefEntry, cache_.get(), handle); result->RegisterCleanup(&UnrefEntry, cache_.get(), handle);
if (tableptr != nullptr) { if (table_reader_ptr != nullptr) {
*tableptr = table; *table_reader_ptr = table_reader;
} }
if (for_compaction) { if (for_compaction) {
table->SetupForCompaction(); table_reader->SetupForCompaction();
} }
return result; return result;
@ -134,8 +134,8 @@ Status TableCache::Get(const ReadOptions& options,
&handle, table_io, &handle, table_io,
options.read_tier == kBlockCacheTier); options.read_tier == kBlockCacheTier);
if (s.ok()) { if (s.ok()) {
Table* t = TableReader* t =
reinterpret_cast<Table*>(cache_->Value(handle)); reinterpret_cast<TableReader*>(cache_->Value(handle));
s = t->Get(options, k, arg, saver, mark_key_may_exist); s = t->Get(options, k, arg, saver, mark_key_may_exist);
cache_->Release(handle); cache_->Release(handle);
} else if (options.read_tier && s.IsIncomplete()) { } else if (options.read_tier && s.IsIncomplete()) {
@ -156,8 +156,8 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options,
file_size, &handle, table_io); file_size, &handle, table_io);
bool may_match = true; bool may_match = true;
if (s.ok()) { if (s.ok()) {
Table* t = TableReader* t =
reinterpret_cast<Table*>(cache_->Value(handle)); reinterpret_cast<TableReader*>(cache_->Value(handle));
may_match = t->PrefixMayMatch(internal_prefix); may_match = t->PrefixMayMatch(internal_prefix);
cache_->Release(handle); cache_->Release(handle);
} }

View File

@ -39,7 +39,7 @@ class TableCache {
const EnvOptions& toptions, const EnvOptions& toptions,
uint64_t file_number, uint64_t file_number,
uint64_t file_size, uint64_t file_size,
Table** tableptr = nullptr, TableReader** table_reader_ptr = nullptr,
bool for_compaction = false); bool for_compaction = false);
// If a seek to internal key "k" in specified file finds an entry, // If a seek to internal key "k" in specified file finds an entry,

View File

@ -20,7 +20,7 @@ namespace rocksdb {
class TableStatsTest { class TableStatsTest {
private: private:
unique_ptr<Table> table_; unique_ptr<TableReader> table_reader_;
}; };
// TODO(kailiu) the following classes should be moved to some more general // TODO(kailiu) the following classes should be moved to some more general
@ -88,22 +88,21 @@ void MakeBuilder(
std::unique_ptr<TableBuilder>* builder) { std::unique_ptr<TableBuilder>* builder) {
writable->reset(new FakeWritableFile); writable->reset(new FakeWritableFile);
builder->reset( builder->reset(
options.table_factory->GetTableBuilder(options, writable->get(), 0, options.table_factory->GetTableBuilder(options, writable->get(),
true) options.compression));
);
} }
void OpenTable( void OpenTable(
const Options& options, const Options& options,
const std::string& contents, const std::string& contents,
std::unique_ptr<Table>* table) { std::unique_ptr<TableReader>* table_reader) {
std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents)); std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents));
auto s = options.table_factory->OpenTable( auto s = options.table_factory->GetTableReader(
options, options,
EnvOptions(), EnvOptions(),
std::move(file), std::move(file),
contents.size(), contents.size(),
table table_reader
); );
ASSERT_OK(s); ASSERT_OK(s);
} }
@ -176,9 +175,9 @@ TEST(TableStatsTest, CustomizedTableStatsCollector) {
ASSERT_OK(builder->Finish()); ASSERT_OK(builder->Finish());
// -- Step 2: Open table // -- Step 2: Open table
std::unique_ptr<Table> table; std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table); OpenTable(options, writable->contents(), &table_reader);
const auto& stats = table->GetTableStats().user_collected_stats; const auto& stats = table_reader->GetTableStats().user_collected_stats;
ASSERT_EQ("Rocksdb", stats.at("TableStatsTest")); ASSERT_EQ("Rocksdb", stats.at("TableStatsTest"));
@ -234,9 +233,9 @@ TEST(TableStatsTest, InternalKeyStatsCollector) {
ASSERT_OK(builder->Finish()); ASSERT_OK(builder->Finish());
std::unique_ptr<Table> table; std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table); OpenTable(options, writable->contents(), &table_reader);
const auto& stats = table->GetTableStats().user_collected_stats; const auto& stats = table_reader->GetTableStats().user_collected_stats;
uint64_t deleted = 0; uint64_t deleted = 0;
Slice key(stats.at(InternalKeyTableStatsNames::kDeletedKeys)); Slice key(stats.at(InternalKeyTableStatsNames::kDeletedKeys));

View File

@ -1920,12 +1920,12 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
} else { } else {
// "ikey" falls in the range for this table. Add the // "ikey" falls in the range for this table. Add the
// approximate offset of "ikey" within the table. // approximate offset of "ikey" within the table.
Table* tableptr; TableReader* table_reader_ptr;
Iterator* iter = table_cache_->NewIterator( Iterator* iter = table_cache_->NewIterator(
ReadOptions(), storage_options_, files[i]->number, ReadOptions(), storage_options_, files[i]->number,
files[i]->file_size, &tableptr); files[i]->file_size, &table_reader_ptr);
if (tableptr != nullptr) { if (table_reader_ptr != nullptr) {
result += tableptr->ApproximateOffsetOf(ikey.Encode()); result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode());
} }
delete iter; delete iter;
} }

View File

@ -19,7 +19,6 @@
#include "rocksdb/universal_compaction.h" #include "rocksdb/universal_compaction.h"
#include "rocksdb/memtablerep.h" #include "rocksdb/memtablerep.h"
#include "rocksdb/slice_transform.h" #include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
namespace rocksdb { namespace rocksdb {

View File

@ -13,6 +13,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/table_stats.h" #include "rocksdb/table_stats.h"
#include "rocksdb/options.h"
namespace rocksdb { namespace rocksdb {
@ -31,22 +32,8 @@ using std::unique_ptr;
// external synchronization, but if any of the threads may call a // external synchronization, but if any of the threads may call a
// non-const method, all threads accessing the same TableBuilder must use // non-const method, all threads accessing the same TableBuilder must use
// external synchronization. // external synchronization.
class TableBuilder { class TableBuilder {
public: public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
//
// If enable_compression=true, this table will follow the compression
// setting given in parameter options. If enable_compression=false, the
// table will not be compressed.
explicit TableBuilder(int level = -1, const bool enable_compression = true) :
level_(level) {
}
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
virtual ~TableBuilder() {} virtual ~TableBuilder() {}
@ -74,17 +61,14 @@ class TableBuilder {
// Size of the file generated so far. If invoked after a successful // Size of the file generated so far. If invoked after a successful
// Finish() call, returns the size of the final generated file. // Finish() call, returns the size of the final generated file.
virtual uint64_t FileSize() const = 0; virtual uint64_t FileSize() const = 0;
protected:
int level_;
}; };
// A Table is a sorted map from strings to strings. Tables are // A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from // immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization. // multiple threads without external synchronization.
class Table { class TableReader {
public: public:
virtual ~Table() {} virtual ~TableReader() {}
// Determine whether there is a chance that the current table file // Determine whether there is a chance that the current table file
// contains the key a key starting with iternal_prefix. The specific // contains the key a key starting with iternal_prefix. The specific
@ -116,29 +100,25 @@ class Table {
virtual TableStats& GetTableStats() = 0; virtual TableStats& GetTableStats() = 0;
// Get function issued to look for specific key. // Calls (*result_handler)(handle_context, ...) repeatedly, starting with
// The table will search the first entry in the table whose user key // the entry found after a call to Seek(key), until result_handler returns
// matches key, and pass it to the call back function handle_result, // false, where k is the actual internal key for a row found and v as the
// with the first argument to be parameter arg, and the last bool // value of the key. didIO is true if I/O is involved in the operation. May
// parameter to be whether an I/O is issued. // not make such a call if filter policy says that key is not present.
// mark_key_may_exist call back is called when it is configured to be //
// mark_key_may_exist_handler needs to be called when it is configured to be
// memory only and the key is not found in the block cache, with // memory only and the key is not found in the block cache, with
// the parameter to be arg. // the parameter to be handle_context.
//
// readOptions is the options for the read
// key is the key to search for
virtual Status Get( virtual Status Get(
const ReadOptions&, const Slice& key, const ReadOptions& readOptions,
void* arg, const Slice& key,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool), void* handle_context,
void (*mark_key_may_exist)(void*) = nullptr) = 0; bool (*result_handler)(void* handle_context, const Slice& k,
}; const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr) = 0;
struct TableStatsNames {
static const std::string kDataSize;
static const std::string kIndexSize;
static const std::string kRawKeySize;
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kFilterPolicy;
}; };
// A base class for table factories // A base class for table factories
@ -146,7 +126,7 @@ class TableFactory {
public: public:
virtual ~TableFactory() {} virtual ~TableFactory() {}
// The name of the comparator. // The type of the table.
// //
// The client of this package should switch to a new name whenever // The client of this package should switch to a new name whenever
// the table format implementation changes. // the table format implementation changes.
@ -159,16 +139,21 @@ class TableFactory {
// in parameter file. It's the caller's responsibility to make sure // in parameter file. It's the caller's responsibility to make sure
// file is in the correct format. // file is in the correct format.
// //
// OpenTable() is called in two places: // GetTableReader() is called in two places:
// (1) TableCache::FindTable() calls the function when table cache miss // (1) TableCache::FindTable() calls the function when table cache miss
// and cache the table object returned. // and cache the table object returned.
// (1) SstFileReader (for SST Dump) opens the table and dump the table // (1) SstFileReader (for SST Dump) opens the table and dump the table
// contents using the interator of the table. // contents using the interator of the table.
virtual Status OpenTable(const Options& options, // options and soptions are options. options is the general options.
const EnvOptions& soptions, // Multiple configured can be accessed from there, including and not
unique_ptr<RandomAccessFile>&& file, // limited to block cache and key comparators.
uint64_t file_size, // file is a file handler to handle the file for the table
unique_ptr<Table>* table) const = 0; // file_size is the physical file size of the file
// table_reader is the output table reader
virtual Status GetTableReader(
const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const = 0;
// Return a table builder to write to a file for this table type. // Return a table builder to write to a file for this table type.
// //
@ -182,8 +167,14 @@ class TableFactory {
// by calling BuildTable()) // by calling BuildTable())
// (4) When running Repairer, it creates a table builder to convert logs to // (4) When running Repairer, it creates a table builder to convert logs to
// SST files (In Repairer::ConvertLogToTable() by calling BuildTable()) // SST files (In Repairer::ConvertLogToTable() by calling BuildTable())
//
// options is the general options. Multiple configured can be acceseed from
// there, including and not limited to compression options.
// file is a handle of a writable file. It is the caller's responsibility to
// keep the file open and close the file after closing the table builder.
// compression_type is the compression type to use in this table.
virtual TableBuilder* GetTableBuilder( virtual TableBuilder* GetTableBuilder(
const Options& options, WritableFile* file, int level, const Options& options, WritableFile* file,
const bool enable_compression) const = 0; CompressionType compression_type) const = 0;
}; };
} // namespace rocksdb } // namespace rocksdb

View File

@ -17,7 +17,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "table/block_based_table.h" #include "table/block_based_table_reader.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/filter_block.h" #include "table/filter_block.h"
#include "table/format.h" #include "table/format.h"
@ -81,8 +81,7 @@ struct BlockBasedTableBuilder::Rep {
BlockBuilder data_block; BlockBuilder data_block;
BlockBuilder index_block; BlockBuilder index_block;
std::string last_key; std::string last_key;
// Whether enable compression in this table. CompressionType compression_type;
bool enable_compression;
uint64_t num_entries = 0; uint64_t num_entries = 0;
uint64_t num_data_blocks = 0; uint64_t num_data_blocks = 0;
@ -107,13 +106,13 @@ struct BlockBasedTableBuilder::Rep {
std::string compressed_output; std::string compressed_output;
Rep(const Options& opt, WritableFile* f, bool enable_compression) Rep(const Options& opt, WritableFile* f, CompressionType compression_type)
: options(opt), : options(opt),
index_block_options(opt), index_block_options(opt),
file(f), file(f),
data_block(&options), data_block(&options),
index_block(1, index_block_options.comparator), index_block(1, index_block_options.comparator),
enable_compression(enable_compression), compression_type(compression_type),
filter_block(opt.filter_policy == nullptr ? nullptr filter_block(opt.filter_policy == nullptr ? nullptr
: new FilterBlockBuilder(opt)), : new FilterBlockBuilder(opt)),
pending_index_entry(false) { pending_index_entry(false) {
@ -121,9 +120,9 @@ struct BlockBasedTableBuilder::Rep {
}; };
BlockBasedTableBuilder::BlockBasedTableBuilder(const Options& options, BlockBasedTableBuilder::BlockBasedTableBuilder(const Options& options,
WritableFile* file, int level, WritableFile* file,
const bool enable_compression) CompressionType compression_type)
: TableBuilder(level), rep_(new Rep(options, file, enable_compression)) { : rep_(new Rep(options, file, compression_type)) {
if (rep_->filter_block != nullptr) { if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0); rep_->filter_block->StartBlock(0);
} }
@ -220,26 +219,7 @@ void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block,
Slice block_contents; Slice block_contents;
std::string* compressed = &r->compressed_output; std::string* compressed = &r->compressed_output;
CompressionType type; CompressionType type = r->compression_type;
if (!r->enable_compression) {
// disable compression
type = kNoCompression;
} else {
// If the use has specified a different compression level for each level,
// then pick the compresison for that level.
if (!r->options.compression_per_level.empty()) {
const int n = r->options.compression_per_level.size();
// It is possible for level_ to be -1; in that case, we use level
// 0's compression. This occurs mostly in backwards compatibility
// situations when the builder doesn't know what level the file
// belongs to. Likewise, if level_ is beyond the end of the
// specified compression levels, use the last value.
type = r->options.compression_per_level[std::max(0,
std::min(level_, n))];
} else {
type = r->options.compression;
}
}
switch (type) { switch (type) {
case kNoCompression: case kNoCompression:
block_contents = raw; block_contents = raw;
@ -376,19 +356,21 @@ Status BlockBasedTableBuilder::Finish() {
BytewiseSortedMap stats; BytewiseSortedMap stats;
// Add basic stats // Add basic stats
AddStats(stats, TableStatsNames::kRawKeySize, r->raw_key_size); AddStats(stats, BlockBasedTableStatsNames::kRawKeySize, r->raw_key_size);
AddStats(stats, TableStatsNames::kRawValueSize, r->raw_value_size); AddStats(stats, BlockBasedTableStatsNames::kRawValueSize,
AddStats(stats, TableStatsNames::kDataSize, r->data_size); r->raw_value_size);
AddStats(stats, BlockBasedTableStatsNames::kDataSize, r->data_size);
AddStats( AddStats(
stats, stats,
TableStatsNames::kIndexSize, BlockBasedTableStatsNames::kIndexSize,
r->index_block.CurrentSizeEstimate() + kBlockTrailerSize r->index_block.CurrentSizeEstimate() + kBlockTrailerSize
); );
AddStats(stats, TableStatsNames::kNumEntries, r->num_entries); AddStats(stats, BlockBasedTableStatsNames::kNumEntries, r->num_entries);
AddStats(stats, TableStatsNames::kNumDataBlocks, r->num_data_blocks); AddStats(stats, BlockBasedTableStatsNames::kNumDataBlocks,
r->num_data_blocks);
if (r->filter_block != nullptr) { if (r->filter_block != nullptr) {
stats.insert(std::make_pair( stats.insert(std::make_pair(
TableStatsNames::kFilterPolicy, BlockBasedTableStatsNames::kFilterPolicy,
r->options.filter_policy->Name() r->options.filter_policy->Name()
)); ));
} }

View File

@ -11,24 +11,22 @@
#include <stdint.h> #include <stdint.h>
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/table.h"
namespace rocksdb { namespace rocksdb {
class BlockBuilder; class BlockBuilder;
class BlockHandle; class BlockHandle;
class WritableFile; class WritableFile;
class TableBuilder;
class BlockBasedTableBuilder : public TableBuilder { class BlockBasedTableBuilder : public TableBuilder {
public: public:
// Create a builder that will store the contents of the table it is // Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the // building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file // caller to close the file after calling Finish().
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
BlockBasedTableBuilder(const Options& options, WritableFile* file, BlockBasedTableBuilder(const Options& options, WritableFile* file,
int level = -1, const bool enable_compression = true); CompressionType compression_type);
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~BlockBasedTableBuilder(); ~BlockBasedTableBuilder();

View File

@ -13,24 +13,22 @@
#include <memory> #include <memory>
#include <stdint.h> #include <stdint.h>
#include "table/block_based_table_builder.h" #include "table/block_based_table_builder.h"
#include "table/block_based_table.h" #include "table/block_based_table_reader.h"
#include "port/port.h" #include "port/port.h"
namespace rocksdb { namespace rocksdb {
Status BlockBasedTableFactory::OpenTable(const Options& options, Status BlockBasedTableFactory::GetTableReader(
const EnvOptions& soptions, const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, unique_ptr<RandomAccessFile> && file, uint64_t file_size,
uint64_t file_size, unique_ptr<TableReader>* table_reader) const {
unique_ptr<Table>* table) const {
return BlockBasedTable::Open(options, soptions, std::move(file), file_size, return BlockBasedTable::Open(options, soptions, std::move(file), file_size,
table); table_reader);
} }
TableBuilder* BlockBasedTableFactory::GetTableBuilder( TableBuilder* BlockBasedTableFactory::GetTableBuilder(
const Options& options, WritableFile* file, int level, const Options& options, WritableFile* file,
const bool enable_compression) const { CompressionType compression_type) const {
return new BlockBasedTableBuilder(options, file, level, enable_compression); return new BlockBasedTableBuilder(options, file, compression_type);
} }
} // namespace rocksdb } // namespace rocksdb

View File

@ -36,12 +36,13 @@ public:
const char* Name() const override { const char* Name() const override {
return "BlockBasedTable"; return "BlockBasedTable";
} }
Status OpenTable(const Options& options, const EnvOptions& soptions, Status GetTableReader(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size, unique_ptr<RandomAccessFile> && file,
unique_ptr<Table>* table) const override; uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) const CompressionType compression_type) const
override; override;
}; };

View File

@ -7,7 +7,7 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based_table.h" #include "table/block_based_table_reader.h"
#include "db/dbformat.h" #include "db/dbformat.h"
@ -113,8 +113,8 @@ Status BlockBasedTable::Open(const Options& options,
const EnvOptions& soptions, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, unique_ptr<RandomAccessFile> && file,
uint64_t size, uint64_t size,
unique_ptr<Table>* table) { unique_ptr<TableReader>* table_reader) {
table->reset(); table_reader->reset();
if (size < Footer::kEncodedLength) { if (size < Footer::kEncodedLength) {
return Status::InvalidArgument("file is too short to be an sstable"); return Status::InvalidArgument("file is too short to be an sstable");
} }
@ -151,8 +151,8 @@ Status BlockBasedTable::Open(const Options& options,
SetupCacheKeyPrefix(rep); SetupCacheKeyPrefix(rep);
rep->filter_data = nullptr; rep->filter_data = nullptr;
rep->filter = nullptr; rep->filter = nullptr;
table->reset(new BlockBasedTable(rep)); table_reader->reset(new BlockBasedTable(rep));
((BlockBasedTable*) (table->get()))->ReadMeta(footer); ((BlockBasedTable*) (table_reader->get()))->ReadMeta(footer);
} else { } else {
if (index_block) delete index_block; if (index_block) delete index_block;
} }
@ -275,12 +275,12 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) {
auto& table_stats = rep->table_stats; auto& table_stats = rep->table_stats;
// All pre-defined stats of type uint64_t // All pre-defined stats of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_stats = { std::unordered_map<std::string, uint64_t*> predefined_uint64_stats = {
{ TableStatsNames::kDataSize, &table_stats.data_size }, { BlockBasedTableStatsNames::kDataSize, &table_stats.data_size },
{ TableStatsNames::kIndexSize, &table_stats.index_size }, { BlockBasedTableStatsNames::kIndexSize, &table_stats.index_size },
{ TableStatsNames::kRawKeySize, &table_stats.raw_key_size }, { BlockBasedTableStatsNames::kRawKeySize, &table_stats.raw_key_size },
{ TableStatsNames::kRawValueSize, &table_stats.raw_value_size }, { BlockBasedTableStatsNames::kRawValueSize, &table_stats.raw_value_size },
{ TableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks }, { BlockBasedTableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks},
{ TableStatsNames::kNumEntries, &table_stats.num_entries }, { BlockBasedTableStatsNames::kNumEntries, &table_stats.num_entries },
}; };
std::string last_key; std::string last_key;
@ -313,7 +313,7 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) {
continue; continue;
} }
*(pos->second) = val; *(pos->second) = val;
} else if (key == TableStatsNames::kFilterPolicy) { } else if (key == BlockBasedTableStatsNames::kFilterPolicy) {
table_stats.filter_policy_name = raw_val.ToString(); table_stats.filter_policy_name = raw_val.ToString();
} else { } else {
// handle user-collected // handle user-collected
@ -497,10 +497,6 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
return may_match; return may_match;
} }
Iterator* Table::NewIterator(const ReadOptions& options) {
return nullptr;
}
Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) { Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
if (options.prefix) { if (options.prefix) {
InternalKey internal_prefix(*options.prefix, 0, kTypeValue); InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
@ -517,21 +513,23 @@ Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
options, rep_->soptions); options, rep_->soptions);
} }
Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k, Status BlockBasedTable::Get(
void* arg, const ReadOptions& readOptions,
bool (*saver)(void*, const Slice&, const Slice&, const Slice& key,
bool), void* handle_context,
void (*mark_key_may_exist)(void*)) { bool (*result_handler)(void* handle_context, const Slice& k,
const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context)) {
Status s; Status s;
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator); Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
bool done = false; bool done = false;
for (iiter->Seek(k); iiter->Valid() && !done; iiter->Next()) { for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
Slice handle_value = iiter->value(); Slice handle_value = iiter->value();
FilterBlockReader* filter = rep_->filter; FilterBlockReader* filter = rep_->filter;
BlockHandle handle; BlockHandle handle;
if (filter != nullptr && if (filter != nullptr &&
handle.DecodeFrom(&handle_value).ok() && handle.DecodeFrom(&handle_value).ok() &&
!filter->KeyMayMatch(handle.offset(), k)) { !filter->KeyMayMatch(handle.offset(), key)) {
// Not found // Not found
// TODO: think about interaction with Merge. If a user key cannot // TODO: think about interaction with Merge. If a user key cannot
// cross one data block, we should be fine. // cross one data block, we should be fine.
@ -540,19 +538,20 @@ Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k,
} else { } else {
bool didIO = false; bool didIO = false;
std::unique_ptr<Iterator> block_iter( std::unique_ptr<Iterator> block_iter(
BlockReader(this, options, iiter->value(), &didIO)); BlockReader(this, readOptions, iiter->value(), &didIO));
if (options.read_tier && block_iter->status().IsIncomplete()) { if (readOptions.read_tier && block_iter->status().IsIncomplete()) {
// couldn't get block from block_cache // couldn't get block from block_cache
// Update Saver.state to Found because we are only looking for whether // Update Saver.state to Found because we are only looking for whether
// we can guarantee the key is not there when "no_io" is set // we can guarantee the key is not there when "no_io" is set
(*mark_key_may_exist)(arg); (*mark_key_may_exist_handler)(handle_context);
break; break;
} }
// Call the *saver function on each entry/block until it returns false // Call the *saver function on each entry/block until it returns false
for (block_iter->Seek(k); block_iter->Valid(); block_iter->Next()) { for (block_iter->Seek(key); block_iter->Valid(); block_iter->Next()) {
if (!(*saver)(arg, block_iter->key(), block_iter->value(), didIO)) { if (!(*result_handler)(handle_context, block_iter->key(),
block_iter->value(), didIO)) {
done = true; done = true;
break; break;
} }
@ -611,12 +610,17 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) {
const std::string BlockBasedTable::kFilterBlockPrefix = "filter."; const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";
const std::string BlockBasedTable::kStatsBlock = "rocksdb.stats"; const std::string BlockBasedTable::kStatsBlock = "rocksdb.stats";
const std::string TableStatsNames::kDataSize = "rocksdb.data.size"; const std::string BlockBasedTableStatsNames::kDataSize = "rocksdb.data.size";
const std::string TableStatsNames::kIndexSize = "rocksdb.index.size"; const std::string BlockBasedTableStatsNames::kIndexSize = "rocksdb.index.size";
const std::string TableStatsNames::kRawKeySize = "rocksdb.raw.key.size"; const std::string BlockBasedTableStatsNames::kRawKeySize =
const std::string TableStatsNames::kRawValueSize = "rocksdb.raw.value.size"; "rocksdb.raw.key.size";
const std::string TableStatsNames::kNumDataBlocks = "rocksdb.num.data.blocks"; const std::string BlockBasedTableStatsNames::kRawValueSize =
const std::string TableStatsNames::kNumEntries = "rocksdb.num.entries"; "rocksdb.raw.value.size";
const std::string TableStatsNames::kFilterPolicy = "rocksdb.filter.policy"; const std::string BlockBasedTableStatsNames::kNumDataBlocks =
"rocksdb.num.data.blocks";
const std::string BlockBasedTableStatsNames::kNumEntries =
"rocksdb.num.entries";
const std::string BlockBasedTableStatsNames::kFilterPolicy =
"rocksdb.filter.policy";
} // namespace rocksdb } // namespace rocksdb

View File

@ -24,14 +24,14 @@ struct Options;
class RandomAccessFile; class RandomAccessFile;
struct ReadOptions; struct ReadOptions;
class TableCache; class TableCache;
class Table; class TableReader;
using std::unique_ptr; using std::unique_ptr;
// A Table is a sorted map from strings to strings. Tables are // A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from // immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization. // multiple threads without external synchronization.
class BlockBasedTable : public Table { class BlockBasedTable : public TableReader {
public: public:
static const std::string kFilterBlockPrefix; static const std::string kFilterBlockPrefix;
static const std::string kStatsBlock; static const std::string kStatsBlock;
@ -40,19 +40,17 @@ class BlockBasedTable : public Table {
// of "file", and read the metadata entries necessary to allow // of "file", and read the metadata entries necessary to allow
// retrieving data from the table. // retrieving data from the table.
// //
// If successful, returns ok and sets "*table" to the newly opened // If successful, returns ok and sets "*table_reader" to the newly opened
// table. The client should delete "*table" when no longer needed. // table. The client should delete "*table_reader" when no longer needed.
// If there was an error while initializing the table, sets "*table" // If there was an error while initializing the table, sets "*table_reader"
// to nullptr and returns a non-ok status. Does not take ownership of // to nullptr and returns a non-ok status.
// "*source", but the client must ensure that "source" remains live
// for the duration of the returned table's lifetime.
// //
// *file must remain live while this Table is in use. // *file must remain live while this Table is in use.
static Status Open(const Options& options, static Status Open(const Options& options,
const EnvOptions& soptions, const EnvOptions& soptions,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFile>&& file,
uint64_t file_size, uint64_t file_size,
unique_ptr<Table>* table); unique_ptr<TableReader>* table_reader);
bool PrefixMayMatch(const Slice& internal_prefix) override; bool PrefixMayMatch(const Slice& internal_prefix) override;
@ -62,10 +60,13 @@ class BlockBasedTable : public Table {
Iterator* NewIterator(const ReadOptions&) override; Iterator* NewIterator(const ReadOptions&) override;
Status Get( Status Get(
const ReadOptions&, const Slice& key, const ReadOptions& readOptions,
void* arg, const Slice& key,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool), void* handle_context,
void (*mark_key_may_exist)(void*) = nullptr) override; bool (*result_handler)(void* handle_context, const Slice& k,
const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr)
override;
// Given a key, return an approximate byte offset in the file where // Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were // the data for that key begins (or would begin if the key were
@ -115,8 +116,18 @@ class BlockBasedTable : public Table {
} }
// No copying allowed // No copying allowed
explicit BlockBasedTable(const Table&) = delete; explicit BlockBasedTable(const TableReader&) = delete;
void operator=(const Table&) = delete; void operator=(const TableReader&) = delete;
};
struct BlockBasedTableStatsNames {
static const std::string kDataSize;
static const std::string kIndexSize;
static const std::string kRawKeySize;
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kFilterPolicy;
}; };
} // namespace rocksdb } // namespace rocksdb

View File

@ -22,7 +22,7 @@
#include "table/block.h" #include "table/block.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/format.h" #include "table/format.h"
#include "table/block_based_table.h" #include "table/block_based_table_reader.h"
#include "table/block_based_table_builder.h" #include "table/block_based_table_builder.h"
#include "util/random.h" #include "util/random.h"
#include "util/testharness.h" #include "util/testharness.h"
@ -250,7 +250,7 @@ class BlockBasedTableConstructor: public Constructor {
virtual Status FinishImpl(const Options& options, const KVMap& data) { virtual Status FinishImpl(const Options& options, const KVMap& data) {
Reset(); Reset();
sink_.reset(new StringSink()); sink_.reset(new StringSink());
BlockBasedTableBuilder builder(options, sink_.get()); BlockBasedTableBuilder builder(options, sink_.get(), options.compression);
for (KVMap::const_iterator it = data.begin(); for (KVMap::const_iterator it = data.begin();
it != data.end(); it != data.end();
@ -267,36 +267,36 @@ class BlockBasedTableConstructor: public Constructor {
uniq_id_ = cur_uniq_id_++; uniq_id_ = cur_uniq_id_++;
source_.reset(new StringSource(sink_->contents(), uniq_id_)); source_.reset(new StringSource(sink_->contents(), uniq_id_));
unique_ptr<TableFactory> table_factory; unique_ptr<TableFactory> table_factory;
return options.table_factory->OpenTable(options, soptions, return options.table_factory->GetTableReader(options, soptions,
std::move(source_), std::move(source_),
sink_->contents().size(), sink_->contents().size(),
&table_); &table_reader_);
} }
virtual Iterator* NewIterator() const { virtual Iterator* NewIterator() const {
return table_->NewIterator(ReadOptions()); return table_reader_->NewIterator(ReadOptions());
} }
uint64_t ApproximateOffsetOf(const Slice& key) const { uint64_t ApproximateOffsetOf(const Slice& key) const {
return table_->ApproximateOffsetOf(key); return table_reader_->ApproximateOffsetOf(key);
} }
virtual Status Reopen(const Options& options) { virtual Status Reopen(const Options& options) {
source_.reset(new StringSource(sink_->contents(), uniq_id_)); source_.reset(new StringSource(sink_->contents(), uniq_id_));
return options.table_factory->OpenTable(options, soptions, return options.table_factory->GetTableReader(options, soptions,
std::move(source_), std::move(source_),
sink_->contents().size(), sink_->contents().size(),
&table_); &table_reader_);
} }
virtual Table* table() { virtual TableReader* table_reader() {
return table_.get(); return table_reader_.get();
} }
private: private:
void Reset() { void Reset() {
uniq_id_ = 0; uniq_id_ = 0;
table_.reset(); table_reader_.reset();
sink_.reset(); sink_.reset();
source_.reset(); source_.reset();
} }
@ -304,7 +304,7 @@ class BlockBasedTableConstructor: public Constructor {
uint64_t uniq_id_; uint64_t uniq_id_;
unique_ptr<StringSink> sink_; unique_ptr<StringSink> sink_;
unique_ptr<StringSource> source_; unique_ptr<StringSource> source_;
unique_ptr<Table> table_; unique_ptr<TableReader> table_reader_;
BlockBasedTableConstructor(); BlockBasedTableConstructor();
@ -883,7 +883,7 @@ TEST(TableTest, BasicTableStats) {
c.Finish(options, &keys, &kvmap); c.Finish(options, &keys, &kvmap);
auto& stats = c.table()->GetTableStats(); auto& stats = c.table_reader()->GetTableStats();
ASSERT_EQ(kvmap.size(), stats.num_entries); ASSERT_EQ(kvmap.size(), stats.num_entries);
auto raw_key_size = kvmap.size() * 2ul; auto raw_key_size = kvmap.size() * 2ul;
@ -918,7 +918,7 @@ TEST(TableTest, FilterPolicyNameStats) {
options.filter_policy = filter_policy.get(); options.filter_policy = filter_policy.get();
c.Finish(options, &keys, &kvmap); c.Finish(options, &keys, &kvmap);
auto& stats = c.table()->GetTableStats(); auto& stats = c.table_reader()->GetTableStats();
ASSERT_EQ("rocksdb.BuiltinBloomFilter", stats.filter_policy_name); ASSERT_EQ("rocksdb.BuiltinBloomFilter", stats.filter_policy_name);
} }
@ -960,7 +960,7 @@ TEST(TableTest, IndexSizeStat) {
c.Finish(options, &ks, &kvmap); c.Finish(options, &ks, &kvmap);
auto index_size = auto index_size =
c.table()->GetTableStats().index_size; c.table_reader()->GetTableStats().index_size;
ASSERT_GT(index_size, last_index_size); ASSERT_GT(index_size, last_index_size);
last_index_size = index_size; last_index_size = index_size;
} }
@ -985,7 +985,7 @@ TEST(TableTest, NumBlockStat) {
c.Finish(options, &ks, &kvmap); c.Finish(options, &ks, &kvmap);
ASSERT_EQ( ASSERT_EQ(
kvmap.size(), kvmap.size(),
c.table()->GetTableStats().num_data_blocks c.table_reader()->GetTableStats().num_data_blocks
); );
} }
@ -1100,7 +1100,7 @@ TEST(TableTest, BlockCacheLeak) {
ASSERT_OK(c.Reopen(opt)); ASSERT_OK(c.Reopen(opt));
for (const std::string& key: keys) { for (const std::string& key: keys) {
ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key)); ASSERT_TRUE(c.table_reader()->TEST_KeyInCache(ReadOptions(), key));
} }
} }

View File

@ -63,7 +63,7 @@ Status SstFileReader::ReadSequential(bool print_kv,
bool has_to, bool has_to,
const std::string& to_key) const std::string& to_key)
{ {
unique_ptr<Table> table; unique_ptr<TableReader> table_reader;
InternalKeyComparator internal_comparator_(BytewiseComparator()); InternalKeyComparator internal_comparator_(BytewiseComparator());
Options table_options; Options table_options;
table_options.comparator = &internal_comparator_; table_options.comparator = &internal_comparator_;
@ -76,14 +76,15 @@ Status SstFileReader::ReadSequential(bool print_kv,
uint64_t file_size; uint64_t file_size;
table_options.env->GetFileSize(file_name_, &file_size); table_options.env->GetFileSize(file_name_, &file_size);
unique_ptr<TableFactory> table_factory; unique_ptr<TableFactory> table_factory;
s = table_options.table_factory->OpenTable(table_options, soptions_, s = table_options.table_factory->GetTableReader(table_options, soptions_,
std::move(file), file_size, std::move(file), file_size,
&table); &table_reader);
if(!s.ok()) { if(!s.ok()) {
return s; return s;
} }
Iterator* iter = table->NewIterator(ReadOptions(verify_checksum_, false)); Iterator* iter = table_reader->NewIterator(ReadOptions(verify_checksum_,
false));
uint64_t i = 0; uint64_t i = 0;
if (has_from) { if (has_from) {
InternalKey ikey(from_key, kMaxSequenceNumber, kValueTypeForSeek); InternalKey ikey(from_key, kMaxSequenceNumber, kValueTypeForSeek);