ae36e509f8
Summary: The GetLiveFiles() api lists the set of sst files and the current MANIFEST file. But the database continues to append new data to the MANIFEST file even when the application is backing it up to the backup location. This means that the database-version that is stored in the MANIFEST FILE in the backup location does not correspond to the sst files returned by GetLiveFiles. This API adds a new parameter to GetLiveFiles. This new parmeter returns the current size of the MANIFEST file. Test Plan: Unit test attached. Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5631
411 lines
14 KiB
C++
411 lines
14 KiB
C++
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
//
|
|
// The representation of a DBImpl consists of a set of Versions. The
|
|
// newest version is called "current". Older versions may be kept
|
|
// around to provide a consistent view to live iterators.
|
|
//
|
|
// Each Version keeps track of a set of Table files per level. The
|
|
// entire set of versions is maintained in a VersionSet.
|
|
//
|
|
// Version,VersionSet are thread-compatible, but require external
|
|
// synchronization on all accesses.
|
|
|
|
#ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_
|
|
#define STORAGE_LEVELDB_DB_VERSION_SET_H_
|
|
|
|
#include <map>
|
|
#include <set>
|
|
#include <vector>
|
|
#include "db/dbformat.h"
|
|
#include "db/version_edit.h"
|
|
#include "port/port.h"
|
|
|
|
namespace leveldb {
|
|
|
|
namespace log { class Writer; }
|
|
|
|
class Compaction;
|
|
class Iterator;
|
|
class MemTable;
|
|
class TableBuilder;
|
|
class TableCache;
|
|
class Version;
|
|
class VersionSet;
|
|
class WritableFile;
|
|
|
|
// Return the smallest index i such that files[i]->largest >= key.
|
|
// Return files.size() if there is no such file.
|
|
// REQUIRES: "files" contains a sorted list of non-overlapping files.
|
|
extern int FindFile(const InternalKeyComparator& icmp,
|
|
const std::vector<FileMetaData*>& files,
|
|
const Slice& key);
|
|
|
|
// Returns true iff some file in "files" overlaps the user key range
|
|
// [*smallest,*largest].
|
|
// smallest==NULL represents a key smaller than all keys in the DB.
|
|
// largest==NULL represents a key largest than all keys in the DB.
|
|
// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
|
|
// in sorted order.
|
|
extern bool SomeFileOverlapsRange(
|
|
const InternalKeyComparator& icmp,
|
|
bool disjoint_sorted_files,
|
|
const std::vector<FileMetaData*>& files,
|
|
const Slice* smallest_user_key,
|
|
const Slice* largest_user_key);
|
|
|
|
class Version {
|
|
public:
|
|
// Append to *iters a sequence of iterators that will
|
|
// yield the contents of this Version when merged together.
|
|
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
|
void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
|
|
|
|
// Lookup the value for key. If found, store it in *val and
|
|
// return OK. Else return a non-OK status. Fills *stats.
|
|
// REQUIRES: lock is not held
|
|
struct GetStats {
|
|
FileMetaData* seek_file;
|
|
int seek_file_level;
|
|
};
|
|
Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
|
GetStats* stats);
|
|
|
|
// Adds "stats" into the current state. Returns true if a new
|
|
// compaction may need to be triggered, false otherwise.
|
|
// REQUIRES: lock is held
|
|
bool UpdateStats(const GetStats& stats);
|
|
|
|
// Reference count management (so Versions do not disappear out from
|
|
// under live iterators)
|
|
void Ref();
|
|
void Unref();
|
|
|
|
void GetOverlappingInputs(
|
|
int level,
|
|
const InternalKey* begin, // NULL means before all keys
|
|
const InternalKey* end, // NULL means after all keys
|
|
std::vector<FileMetaData*>* inputs);
|
|
|
|
// Returns true iff some file in the specified level overlaps
|
|
// some part of [*smallest_user_key,*largest_user_key].
|
|
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
|
|
// largest_user_key==NULL represents a key largest than all keys in the DB.
|
|
bool OverlapInLevel(int level,
|
|
const Slice* smallest_user_key,
|
|
const Slice* largest_user_key);
|
|
|
|
// Return the level at which we should place a new memtable compaction
|
|
// result that covers the range [smallest_user_key,largest_user_key].
|
|
int PickLevelForMemTableOutput(const Slice& smallest_user_key,
|
|
const Slice& largest_user_key);
|
|
|
|
int NumFiles(int level) const { return files_[level].size(); }
|
|
|
|
// Return a human readable string that describes this version's contents.
|
|
std::string DebugString() const;
|
|
|
|
private:
|
|
friend class Compaction;
|
|
friend class VersionSet;
|
|
|
|
class LevelFileNumIterator;
|
|
Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
|
|
|
|
VersionSet* vset_; // VersionSet to which this Version belongs
|
|
Version* next_; // Next version in linked list
|
|
Version* prev_; // Previous version in linked list
|
|
int refs_; // Number of live refs to this version
|
|
|
|
// List of files per level
|
|
std::vector<FileMetaData*>* files_;
|
|
|
|
// Next file to compact based on seek stats.
|
|
FileMetaData* file_to_compact_;
|
|
int file_to_compact_level_;
|
|
|
|
// Level that should be compacted next and its compaction score.
|
|
// Score < 1 means compaction is not strictly needed. These fields
|
|
// are initialized by Finalize().
|
|
double compaction_score_;
|
|
int compaction_level_;
|
|
|
|
// The offset in the manifest file where this version is stored.
|
|
uint64_t offset_manifest_file_;
|
|
|
|
explicit Version(VersionSet* vset);
|
|
|
|
~Version();
|
|
|
|
// No copying allowed
|
|
Version(const Version&);
|
|
void operator=(const Version&);
|
|
};
|
|
|
|
class VersionSet {
|
|
public:
|
|
VersionSet(const std::string& dbname,
|
|
const Options* options,
|
|
TableCache* table_cache,
|
|
const InternalKeyComparator*);
|
|
~VersionSet();
|
|
|
|
// Apply *edit to the current version to form a new descriptor that
|
|
// is both saved to persistent state and installed as the new
|
|
// current version. Will release *mu while actually writing to the file.
|
|
// REQUIRES: *mu is held on entry.
|
|
// REQUIRES: no other thread concurrently calls LogAndApply()
|
|
Status LogAndApply(VersionEdit* edit, port::Mutex* mu);
|
|
|
|
// Recover the last saved descriptor from persistent storage.
|
|
Status Recover();
|
|
|
|
// Return the current version.
|
|
Version* current() const { return current_; }
|
|
|
|
// Return the current manifest file number
|
|
uint64_t ManifestFileNumber() const { return manifest_file_number_; }
|
|
|
|
// Allocate and return a new file number
|
|
uint64_t NewFileNumber() { return next_file_number_++; }
|
|
|
|
// Arrange to reuse "file_number" unless a newer file number has
|
|
// already been allocated.
|
|
// REQUIRES: "file_number" was returned by a call to NewFileNumber().
|
|
void ReuseFileNumber(uint64_t file_number) {
|
|
if (next_file_number_ == file_number + 1) {
|
|
next_file_number_ = file_number;
|
|
}
|
|
}
|
|
|
|
// Return the number of Table files at the specified level.
|
|
int NumLevelFiles(int level) const;
|
|
|
|
// Return the combined file size of all files at the specified level.
|
|
int64_t NumLevelBytes(int level) const;
|
|
|
|
// Return the last sequence number.
|
|
uint64_t LastSequence() const { return last_sequence_; }
|
|
|
|
// Set the last sequence number to s.
|
|
void SetLastSequence(uint64_t s) {
|
|
assert(s >= last_sequence_);
|
|
last_sequence_ = s;
|
|
}
|
|
|
|
// Mark the specified file number as used.
|
|
void MarkFileNumberUsed(uint64_t number);
|
|
|
|
// Return the current log file number.
|
|
uint64_t LogNumber() const { return log_number_; }
|
|
|
|
// Return the log file number for the log file that is currently
|
|
// being compacted, or zero if there is no such log file.
|
|
uint64_t PrevLogNumber() const { return prev_log_number_; }
|
|
|
|
int NumberLevels() const { return options_->num_levels; }
|
|
|
|
// Pick level and inputs for a new compaction.
|
|
// Returns NULL if there is no compaction to be done.
|
|
// Otherwise returns a pointer to a heap-allocated object that
|
|
// describes the compaction. Caller should delete the result.
|
|
Compaction* PickCompaction();
|
|
|
|
// Return a compaction object for compacting the range [begin,end] in
|
|
// the specified level. Returns NULL if there is nothing in that
|
|
// level that overlaps the specified range. Caller should delete
|
|
// the result.
|
|
Compaction* CompactRange(
|
|
int level,
|
|
const InternalKey* begin,
|
|
const InternalKey* end);
|
|
|
|
// Return the maximum overlapping data (in bytes) at next level for any
|
|
// file at a level >= 1.
|
|
int64_t MaxNextLevelOverlappingBytes();
|
|
|
|
// Create an iterator that reads over the compaction inputs for "*c".
|
|
// The caller should delete the iterator when no longer needed.
|
|
Iterator* MakeInputIterator(Compaction* c);
|
|
|
|
// Returns true iff some level needs a compaction.
|
|
bool NeedsCompaction() const {
|
|
Version* v = current_;
|
|
return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL);
|
|
}
|
|
|
|
// Add all files listed in any live version to *live.
|
|
// May also mutate some internal state.
|
|
void AddLiveFiles(std::set<uint64_t>* live);
|
|
|
|
// Add all files listed in the current version to *live.
|
|
void AddLiveFilesCurrentVersion(std::set<uint64_t>* live);
|
|
|
|
// Return the approximate offset in the database of the data for
|
|
// "key" as of version "v".
|
|
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
|
|
|
// Return a human-readable short (single-line) summary of the number
|
|
// of files per level. Uses *scratch as backing store.
|
|
struct LevelSummaryStorage {
|
|
char buffer[100];
|
|
};
|
|
const char* LevelSummary(LevelSummaryStorage* scratch) const;
|
|
|
|
// printf contents (for debugging)
|
|
Status DumpManifest(Options& options, std::string& manifestFileName);
|
|
|
|
// Return a human-readable short (single-line) summary of the data size
|
|
// of files per level. Uses *scratch as backing store.
|
|
const char* LevelDataSizeSummary(LevelSummaryStorage* scratch) const;
|
|
|
|
// Return the size of the current manifest file
|
|
const uint64_t ManifestFileSize() { return current_->offset_manifest_file_; }
|
|
|
|
private:
|
|
class Builder;
|
|
|
|
friend class Compaction;
|
|
friend class Version;
|
|
|
|
void Finalize(Version* v);
|
|
|
|
void GetRange(const std::vector<FileMetaData*>& inputs,
|
|
InternalKey* smallest,
|
|
InternalKey* largest);
|
|
|
|
void GetRange2(const std::vector<FileMetaData*>& inputs1,
|
|
const std::vector<FileMetaData*>& inputs2,
|
|
InternalKey* smallest,
|
|
InternalKey* largest);
|
|
|
|
void SetupOtherInputs(Compaction* c);
|
|
|
|
// Save current contents to *log
|
|
Status WriteSnapshot(log::Writer* log);
|
|
|
|
void AppendVersion(Version* v);
|
|
|
|
double MaxBytesForLevel(int level);
|
|
|
|
uint64_t MaxFileSizeForLevel(int level);
|
|
|
|
int64_t ExpandedCompactionByteSizeLimit(int level);
|
|
|
|
int64_t MaxGrandParentOverlapBytes(int level);
|
|
|
|
Env* const env_;
|
|
const std::string dbname_;
|
|
const Options* const options_;
|
|
TableCache* const table_cache_;
|
|
const InternalKeyComparator icmp_;
|
|
uint64_t next_file_number_;
|
|
uint64_t manifest_file_number_;
|
|
uint64_t last_sequence_;
|
|
uint64_t log_number_;
|
|
uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
|
|
|
|
// Opened lazily
|
|
WritableFile* descriptor_file_;
|
|
log::Writer* descriptor_log_;
|
|
Version dummy_versions_; // Head of circular doubly-linked list of versions.
|
|
Version* current_; // == dummy_versions_.prev_
|
|
|
|
// Per-level key at which the next compaction at that level should start.
|
|
// Either an empty string, or a valid InternalKey.
|
|
std::string* compact_pointer_;
|
|
|
|
// Per-level target file size.
|
|
uint64_t* max_file_size_;
|
|
|
|
// Per-level max bytes
|
|
uint64_t* level_max_bytes_;
|
|
|
|
// No copying allowed
|
|
VersionSet(const VersionSet&);
|
|
void operator=(const VersionSet&);
|
|
};
|
|
|
|
// A Compaction encapsulates information about a compaction.
|
|
class Compaction {
|
|
public:
|
|
~Compaction();
|
|
|
|
// Return the level that is being compacted. Inputs from "level"
|
|
// and "level+1" will be merged to produce a set of "level+1" files.
|
|
int level() const { return level_; }
|
|
|
|
// Return the object that holds the edits to the descriptor done
|
|
// by this compaction.
|
|
VersionEdit* edit() { return edit_; }
|
|
|
|
// "which" must be either 0 or 1
|
|
int num_input_files(int which) const { return inputs_[which].size(); }
|
|
|
|
// Return the ith input file at "level()+which" ("which" must be 0 or 1).
|
|
FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
|
|
|
|
// Maximum size of files to build during this compaction.
|
|
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
|
|
|
|
// Is this a trivial compaction that can be implemented by just
|
|
// moving a single input file to the next level (no merging or splitting)
|
|
bool IsTrivialMove() const;
|
|
|
|
// Add all inputs to this compaction as delete operations to *edit.
|
|
void AddInputDeletions(VersionEdit* edit);
|
|
|
|
// Returns true if the information we have available guarantees that
|
|
// the compaction is producing data in "level+1" for which no data exists
|
|
// in levels greater than "level+1".
|
|
bool IsBaseLevelForKey(const Slice& user_key);
|
|
|
|
// Returns true iff we should stop building the current output
|
|
// before processing "internal_key".
|
|
bool ShouldStopBefore(const Slice& internal_key);
|
|
|
|
// Release the input version for the compaction, once the compaction
|
|
// is successful.
|
|
void ReleaseInputs();
|
|
|
|
void Summary(char* output, int len);
|
|
|
|
private:
|
|
friend class Version;
|
|
friend class VersionSet;
|
|
|
|
explicit Compaction(int level, uint64_t target_file_size,
|
|
uint64_t max_grandparent_overlap_bytes, int number_levels);
|
|
|
|
int level_;
|
|
uint64_t max_output_file_size_;
|
|
int64_t maxGrandParentOverlapBytes_;
|
|
Version* input_version_;
|
|
VersionEdit* edit_;
|
|
int number_levels_;
|
|
|
|
// Each compaction reads inputs from "level_" and "level_+1"
|
|
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
|
|
|
|
// State used to check for number of of overlapping grandparent files
|
|
// (parent == level_ + 1, grandparent == level_ + 2)
|
|
std::vector<FileMetaData*> grandparents_;
|
|
size_t grandparent_index_; // Index in grandparent_starts_
|
|
bool seen_key_; // Some output key has been seen
|
|
int64_t overlapped_bytes_; // Bytes of overlap between current output
|
|
// and grandparent files
|
|
|
|
// State for implementing IsBaseLevelForKey
|
|
|
|
// level_ptrs_ holds indices into input_version_->levels_: our state
|
|
// is that we are positioned at one of the file ranges for each
|
|
// higher level than the ones involved in this compaction (i.e. for
|
|
// all L >= level_ + 2).
|
|
size_t* level_ptrs_;
|
|
};
|
|
|
|
} // namespace leveldb
|
|
|
|
#endif // STORAGE_LEVELDB_DB_VERSION_SET_H_
|