432f3adf2c
Summary: Add a DB Property "rocksdb.current_version_number" for users to monitor version changes and stale iterators. Test Plan: Add a unit test. Reviewers: andrewkr, yhchiang, kradhakrishnan, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54927
857 lines
38 KiB
C++
857 lines
38 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#ifndef STORAGE_ROCKSDB_INCLUDE_DB_H_
|
|
#define STORAGE_ROCKSDB_INCLUDE_DB_H_
|
|
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include "rocksdb/immutable_options.h"
|
|
#include "rocksdb/iterator.h"
|
|
#include "rocksdb/listener.h"
|
|
#include "rocksdb/metadata.h"
|
|
#include "rocksdb/options.h"
|
|
#include "rocksdb/snapshot.h"
|
|
#include "rocksdb/thread_status.h"
|
|
#include "rocksdb/transaction_log.h"
|
|
#include "rocksdb/types.h"
|
|
#include "rocksdb/version.h"
|
|
|
|
#ifdef _WIN32
|
|
// Windows API macro interference
|
|
#undef DeleteFile
|
|
#endif
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
struct Options;
|
|
struct DBOptions;
|
|
struct ColumnFamilyOptions;
|
|
struct ReadOptions;
|
|
struct WriteOptions;
|
|
struct FlushOptions;
|
|
struct CompactionOptions;
|
|
struct CompactRangeOptions;
|
|
struct TableProperties;
|
|
struct ExternalSstFileInfo;
|
|
class WriteBatch;
|
|
class Env;
|
|
class EventListener;
|
|
|
|
using std::unique_ptr;
|
|
|
|
extern const std::string kDefaultColumnFamilyName;
|
|
struct ColumnFamilyDescriptor {
|
|
std::string name;
|
|
ColumnFamilyOptions options;
|
|
ColumnFamilyDescriptor()
|
|
: name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
|
|
ColumnFamilyDescriptor(const std::string& _name,
|
|
const ColumnFamilyOptions& _options)
|
|
: name(_name), options(_options) {}
|
|
};
|
|
|
|
class ColumnFamilyHandle {
|
|
public:
|
|
virtual ~ColumnFamilyHandle() {}
|
|
// Returns the name of the column family associated with the current handle.
|
|
virtual const std::string& GetName() const = 0;
|
|
// Returns the ID of the column family associated with the current handle.
|
|
virtual uint32_t GetID() const = 0;
|
|
// Fills "*desc" with the up-to-date descriptor of the column family
|
|
// associated with this handle. Since it fills "*desc" with the up-to-date
|
|
// information, this call might internally lock and release DB mutex to
|
|
// access the up-to-date CF options. In addition, all the pointer-typed
|
|
// options cannot be referenced any longer than the original options exist.
|
|
//
|
|
// Note that this function is not supported in RocksDBLite.
|
|
virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
|
|
};
|
|
|
|
static const int kMajorVersion = __ROCKSDB_MAJOR__;
|
|
static const int kMinorVersion = __ROCKSDB_MINOR__;
|
|
|
|
// A range of keys
|
|
struct Range {
|
|
Slice start; // Included in the range
|
|
Slice limit; // Not included in the range
|
|
|
|
Range() { }
|
|
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
|
|
};
|
|
|
|
// A collections of table properties objects, where
|
|
// key: is the table's file name.
|
|
// value: the table properties object of the given table.
|
|
typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
|
|
TablePropertiesCollection;
|
|
|
|
// A DB is a persistent ordered map from keys to values.
|
|
// A DB is safe for concurrent access from multiple threads without
|
|
// any external synchronization.
|
|
class DB {
|
|
public:
|
|
// Open the database with the specified "name".
|
|
// Stores a pointer to a heap-allocated database in *dbptr and returns
|
|
// OK on success.
|
|
// Stores nullptr in *dbptr and returns a non-OK status on error.
|
|
// Caller should delete *dbptr when it is no longer needed.
|
|
static Status Open(const Options& options,
|
|
const std::string& name,
|
|
DB** dbptr);
|
|
|
|
// Open the database for read only. All DB interfaces
|
|
// that modify data, like put/delete, will return error.
|
|
// If the db is opened in read only mode, then no compactions
|
|
// will happen.
|
|
//
|
|
// Not supported in ROCKSDB_LITE, in which case the function will
|
|
// return Status::NotSupported.
|
|
static Status OpenForReadOnly(const Options& options,
|
|
const std::string& name, DB** dbptr,
|
|
bool error_if_log_file_exist = false);
|
|
|
|
// Open the database for read only with column families. When opening DB with
|
|
// read only, you can specify only a subset of column families in the
|
|
// database that should be opened. However, you always need to specify default
|
|
// column family. The default column family name is 'default' and it's stored
|
|
// in rocksdb::kDefaultColumnFamilyName
|
|
//
|
|
// Not supported in ROCKSDB_LITE, in which case the function will
|
|
// return Status::NotSupported.
|
|
static Status OpenForReadOnly(
|
|
const DBOptions& db_options, const std::string& name,
|
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
|
|
bool error_if_log_file_exist = false);
|
|
|
|
// Open DB with column families.
|
|
// db_options specify database specific options
|
|
// column_families is the vector of all column families in the database,
|
|
// containing column family name and options. You need to open ALL column
|
|
// families in the database. To get the list of column families, you can use
|
|
// ListColumnFamilies(). Also, you can open only a subset of column families
|
|
// for read-only access.
|
|
// The default column family name is 'default' and it's stored
|
|
// in rocksdb::kDefaultColumnFamilyName.
|
|
// If everything is OK, handles will on return be the same size
|
|
// as column_families --- handles[i] will be a handle that you
|
|
// will use to operate on column family column_family[i]
|
|
static Status Open(const DBOptions& db_options, const std::string& name,
|
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
|
|
|
|
// ListColumnFamilies will open the DB specified by argument name
|
|
// and return the list of all column families in that DB
|
|
// through column_families argument. The ordering of
|
|
// column families in column_families is unspecified.
|
|
static Status ListColumnFamilies(const DBOptions& db_options,
|
|
const std::string& name,
|
|
std::vector<std::string>* column_families);
|
|
|
|
DB() { }
|
|
virtual ~DB();
|
|
|
|
// Create a column_family and return the handle of column family
|
|
// through the argument handle.
|
|
virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
|
|
const std::string& column_family_name,
|
|
ColumnFamilyHandle** handle);
|
|
|
|
// Drop a column family specified by column_family handle. This call
|
|
// only records a drop record in the manifest and prevents the column
|
|
// family from flushing and compacting.
|
|
virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
|
|
|
|
// Set the database entry for "key" to "value".
|
|
// If "key" already exists, it will be overwritten.
|
|
// Returns OK on success, and a non-OK status on error.
|
|
// Note: consider setting options.sync = true.
|
|
virtual Status Put(const WriteOptions& options,
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
const Slice& value) = 0;
|
|
virtual Status Put(const WriteOptions& options, const Slice& key,
|
|
const Slice& value) {
|
|
return Put(options, DefaultColumnFamily(), key, value);
|
|
}
|
|
|
|
// Remove the database entry (if any) for "key". Returns OK on
|
|
// success, and a non-OK status on error. It is not an error if "key"
|
|
// did not exist in the database.
|
|
// Note: consider setting options.sync = true.
|
|
virtual Status Delete(const WriteOptions& options,
|
|
ColumnFamilyHandle* column_family,
|
|
const Slice& key) = 0;
|
|
virtual Status Delete(const WriteOptions& options, const Slice& key) {
|
|
return Delete(options, DefaultColumnFamily(), key);
|
|
}
|
|
|
|
// Remove the database entry for "key". Requires that the key exists
|
|
// and was not overwritten. Returns OK on success, and a non-OK status
|
|
// on error. It is not an error if "key" did not exist in the database.
|
|
//
|
|
// If a key is overwritten (by calling Put() multiple times), then the result
|
|
// of calling SingleDelete() on this key is undefined. SingleDelete() only
|
|
// behaves correctly if there has been only one Put() for this key since the
|
|
// previous call to SingleDelete() for this key.
|
|
//
|
|
// This feature is currently an experimental performance optimization
|
|
// for a very specific workload. It is up to the caller to ensure that
|
|
// SingleDelete is only used for a key that is not deleted using Delete() or
|
|
// written using Merge(). Mixing SingleDelete operations with Deletes and
|
|
// Merges can result in undefined behavior.
|
|
//
|
|
// Note: consider setting options.sync = true.
|
|
virtual Status SingleDelete(const WriteOptions& options,
|
|
ColumnFamilyHandle* column_family,
|
|
const Slice& key) = 0;
|
|
virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
|
|
return SingleDelete(options, DefaultColumnFamily(), key);
|
|
}
|
|
|
|
// Merge the database entry for "key" with "value". Returns OK on success,
|
|
// and a non-OK status on error. The semantics of this operation is
|
|
// determined by the user provided merge_operator when opening DB.
|
|
// Note: consider setting options.sync = true.
|
|
virtual Status Merge(const WriteOptions& options,
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
const Slice& value) = 0;
|
|
virtual Status Merge(const WriteOptions& options, const Slice& key,
|
|
const Slice& value) {
|
|
return Merge(options, DefaultColumnFamily(), key, value);
|
|
}
|
|
|
|
// Apply the specified updates to the database.
|
|
// If `updates` contains no update, WAL will still be synced if
|
|
// options.sync=true.
|
|
// Returns OK on success, non-OK on failure.
|
|
// Note: consider setting options.sync = true.
|
|
virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
|
|
|
|
// If the database contains an entry for "key" store the
|
|
// corresponding value in *value and return OK.
|
|
//
|
|
// If there is no entry for "key" leave *value unchanged and return
|
|
// a status for which Status::IsNotFound() returns true.
|
|
//
|
|
// May return some other Status on an error.
|
|
virtual Status Get(const ReadOptions& options,
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
std::string* value) = 0;
|
|
virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) {
|
|
return Get(options, DefaultColumnFamily(), key, value);
|
|
}
|
|
|
|
// If keys[i] does not exist in the database, then the i'th returned
|
|
// status will be one for which Status::IsNotFound() is true, and
|
|
// (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
|
|
// the i'th returned status will have Status::ok() true, and (*values)[i]
|
|
// will store the value associated with keys[i].
|
|
//
|
|
// (*values) will always be resized to be the same size as (keys).
|
|
// Similarly, the number of returned statuses will be the number of keys.
|
|
// Note: keys will not be "de-duplicated". Duplicate keys will return
|
|
// duplicate values in order.
|
|
virtual std::vector<Status> MultiGet(
|
|
const ReadOptions& options,
|
|
const std::vector<ColumnFamilyHandle*>& column_family,
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
|
|
virtual std::vector<Status> MultiGet(const ReadOptions& options,
|
|
const std::vector<Slice>& keys,
|
|
std::vector<std::string>* values) {
|
|
return MultiGet(options, std::vector<ColumnFamilyHandle*>(
|
|
keys.size(), DefaultColumnFamily()),
|
|
keys, values);
|
|
}
|
|
|
|
// If the key definitely does not exist in the database, then this method
|
|
// returns false, else true. If the caller wants to obtain value when the key
|
|
// is found in memory, a bool for 'value_found' must be passed. 'value_found'
|
|
// will be true on return if value has been set properly.
|
|
// This check is potentially lighter-weight than invoking DB::Get(). One way
|
|
// to make this lighter weight is to avoid doing any IOs.
|
|
// Default implementation here returns true and sets 'value_found' to false
|
|
virtual bool KeyMayExist(const ReadOptions& /*options*/,
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
const Slice& /*key*/, std::string* /*value*/,
|
|
bool* value_found = nullptr) {
|
|
if (value_found != nullptr) {
|
|
*value_found = false;
|
|
}
|
|
return true;
|
|
}
|
|
virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
|
|
std::string* value, bool* value_found = nullptr) {
|
|
return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
|
|
}
|
|
|
|
// Return a heap-allocated iterator over the contents of the database.
|
|
// The result of NewIterator() is initially invalid (caller must
|
|
// call one of the Seek methods on the iterator before using it).
|
|
//
|
|
// Caller should delete the iterator when it is no longer needed.
|
|
// The returned iterator should be deleted before this db is deleted.
|
|
virtual Iterator* NewIterator(const ReadOptions& options,
|
|
ColumnFamilyHandle* column_family) = 0;
|
|
virtual Iterator* NewIterator(const ReadOptions& options) {
|
|
return NewIterator(options, DefaultColumnFamily());
|
|
}
|
|
// Returns iterators from a consistent database state across multiple
|
|
// column families. Iterators are heap allocated and need to be deleted
|
|
// before the db is deleted
|
|
virtual Status NewIterators(
|
|
const ReadOptions& options,
|
|
const std::vector<ColumnFamilyHandle*>& column_families,
|
|
std::vector<Iterator*>* iterators) = 0;
|
|
|
|
// Return a handle to the current DB state. Iterators created with
|
|
// this handle will all observe a stable snapshot of the current DB
|
|
// state. The caller must call ReleaseSnapshot(result) when the
|
|
// snapshot is no longer needed.
|
|
//
|
|
// nullptr will be returned if the DB fails to take a snapshot or does
|
|
// not support snapshot.
|
|
virtual const Snapshot* GetSnapshot() = 0;
|
|
|
|
// Release a previously acquired snapshot. The caller must not
|
|
// use "snapshot" after this call.
|
|
virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
// Contains all valid property arguments for GetProperty().
|
|
//
|
|
// NOTE: Property names cannot end in numbers since those are interpreted as
|
|
// arguments, e.g., see kNumFilesAtLevelPrefix.
|
|
struct Properties {
|
|
// "rocksdb.num-files-at-level<N>" - returns string containing the number
|
|
// of files at level <N>, where <N> is an ASCII representation of a
|
|
// level number (e.g., "0").
|
|
static const std::string kNumFilesAtLevelPrefix;
|
|
|
|
// "rocksdb.stats" - returns a multi-line string containing the data
|
|
// described by kCFStats followed by the data described by kDBStats.
|
|
static const std::string kStats;
|
|
|
|
// "rocksdb.sstables" - returns a multi-line string summarizing current
|
|
// SST files.
|
|
static const std::string kSSTables;
|
|
|
|
// "rocksdb.cfstats" - returns a multi-line string with general column
|
|
// family stats per-level over db's lifetime ("L<n>"), aggregated over
|
|
// db's lifetime ("Sum"), and aggregated over the interval since the
|
|
// last retrieval ("Int").
|
|
static const std::string kCFStats;
|
|
|
|
// "rocksdb.dbstats" - returns a multi-line string with general database
|
|
// stats, both cumulative (over the db's lifetime) and interval (since
|
|
// the last retrieval of kDBStats).
|
|
static const std::string kDBStats;
|
|
|
|
// "rocksdb.levelstats" - returns multi-line string containing the number
|
|
// of files per level and total size of each level (MB).
|
|
static const std::string kLevelStats;
|
|
|
|
// "rocksdb.num-immutable-mem-table" - returns number of immutable
|
|
// memtables that have not yet been flushed.
|
|
static const std::string kNumImmutableMemTable;
|
|
|
|
// "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
|
|
// memtables that have already been flushed.
|
|
static const std::string kNumImmutableMemTableFlushed;
|
|
|
|
// "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
|
|
// pending; otherwise, returns 0.
|
|
static const std::string kMemTableFlushPending;
|
|
|
|
// "rocksdb.num-running-flushes" - returns the number of currently running
|
|
// flushes.
|
|
static const std::string kNumRunningFlushes;
|
|
|
|
// "rocksdb.compaction-pending" - returns 1 if at least one compaction is
|
|
// pending; otherwise, returns 0.
|
|
static const std::string kCompactionPending;
|
|
|
|
// "rocksdb.num-running-compactions" - returns the number of currently
|
|
// running compactions.
|
|
static const std::string kNumRunningCompactions;
|
|
|
|
// "rocksdb.background-errors" - returns accumulated number of background
|
|
// errors.
|
|
static const std::string kBackgroundErrors;
|
|
|
|
// "rocksdb.cur-size-active-mem-table" - returns approximate size of active
|
|
// memtable (bytes).
|
|
static const std::string kCurSizeActiveMemTable;
|
|
|
|
// "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
|
|
// and unflushed immutable memtables (bytes).
|
|
static const std::string kCurSizeAllMemTables;
|
|
|
|
// "rocksdb.size-all-mem-tables" - returns approximate size of active,
|
|
// unflushed immutable, and pinned immutable memtables (bytes).
|
|
static const std::string kSizeAllMemTables;
|
|
|
|
// "rocksdb.num-entries-active-mem-table" - returns total number of entries
|
|
// in the active memtable.
|
|
static const std::string kNumEntriesActiveMemTable;
|
|
|
|
// "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
|
|
// in the unflushed immutable memtables.
|
|
static const std::string kNumEntriesImmMemTables;
|
|
|
|
// "rocksdb.num-deletes-active-mem-table" - returns total number of delete
|
|
// entries in the active memtable.
|
|
static const std::string kNumDeletesActiveMemTable;
|
|
|
|
// "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
|
|
// entries in the unflushed immutable memtables.
|
|
static const std::string kNumDeletesImmMemTables;
|
|
|
|
// "rocksdb.estimate-num-keys" - returns estimated number of total keys in
|
|
// the active and unflushed immutable memtables.
|
|
static const std::string kEstimateNumKeys;
|
|
|
|
// "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
|
|
// reading SST tables, excluding memory used in block cache (e.g.,
|
|
// filter and index blocks).
|
|
static const std::string kEstimateTableReadersMem;
|
|
|
|
// "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
|
|
// files is enabled; otherwise, returns a non-zero number.
|
|
static const std::string kIsFileDeletionsEnabled;
|
|
|
|
// "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
|
|
// database.
|
|
static const std::string kNumSnapshots;
|
|
|
|
// "rocksdb.oldest-snapshot-time" - returns number representing unix
|
|
// timestamp of oldest unreleased snapshot.
|
|
static const std::string kOldestSnapshotTime;
|
|
|
|
// "rocksdb.num-live-versions" - returns number of live versions. `Version`
|
|
// is an internal data structure. See version_set.h for details. More
|
|
// live versions often mean more SST files are held from being deleted,
|
|
// by iterators or unfinished compactions.
|
|
static const std::string kNumLiveVersions;
|
|
|
|
// "rocksdb.current-version-number" - returns number of curent LSM version.
|
|
static const std::string kCurrentVersionNumber;
|
|
|
|
// "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
|
|
// live data in bytes.
|
|
static const std::string kEstimateLiveDataSize;
|
|
|
|
// "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
|
|
// files.
|
|
// WARNING: may slow down online queries if there are too many files.
|
|
static const std::string kTotalSstFilesSize;
|
|
|
|
// "rocksdb.base-level" - returns number of level to which L0 data will be
|
|
// compacted.
|
|
static const std::string kBaseLevel;
|
|
|
|
// "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
|
|
// number of bytes compaction needs to rewrite to get all levels down
|
|
// to under target size. Not valid for other compactions than level-
|
|
// based.
|
|
static const std::string kEstimatePendingCompactionBytes;
|
|
|
|
// "rocksdb.aggregated-table-properties" - returns a string representation
|
|
// of the aggregated table properties of the target column family.
|
|
static const std::string kAggregatedTableProperties;
|
|
|
|
// "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
|
|
// one but only returns the aggregated table properties of the
|
|
// specified level "N" at the target column family.
|
|
static const std::string kAggregatedTablePropertiesAtLevel;
|
|
};
|
|
#endif /* ROCKSDB_LITE */
|
|
|
|
// DB implementations can export properties about their state via this method.
|
|
// If "property" is a valid property understood by this DB implementation (see
|
|
// Properties struct above for valid options), fills "*value" with its current
|
|
// value and returns true. Otherwise, returns false.
|
|
virtual bool GetProperty(ColumnFamilyHandle* column_family,
|
|
const Slice& property, std::string* value) = 0;
|
|
virtual bool GetProperty(const Slice& property, std::string* value) {
|
|
return GetProperty(DefaultColumnFamily(), property, value);
|
|
}
|
|
|
|
// Similar to GetProperty(), but only works for a subset of properties whose
|
|
// return value is an integer. Return the value by integer. Supported
|
|
// properties:
|
|
// "rocksdb.num-immutable-mem-table"
|
|
// "rocksdb.mem-table-flush-pending"
|
|
// "rocksdb.compaction-pending"
|
|
// "rocksdb.background-errors"
|
|
// "rocksdb.cur-size-active-mem-table"
|
|
// "rocksdb.cur-size-all-mem-tables"
|
|
// "rocksdb.size-all-mem-tables"
|
|
// "rocksdb.num-entries-active-mem-table"
|
|
// "rocksdb.num-entries-imm-mem-tables"
|
|
// "rocksdb.num-deletes-active-mem-table"
|
|
// "rocksdb.num-deletes-imm-mem-tables"
|
|
// "rocksdb.estimate-num-keys"
|
|
// "rocksdb.estimate-table-readers-mem"
|
|
// "rocksdb.is-file-deletions-enabled"
|
|
// "rocksdb.num-snapshots"
|
|
// "rocksdb.oldest-snapshot-time"
|
|
// "rocksdb.num-live-versions"
|
|
// "rocksdb.current_version_number"
|
|
// "rocksdb.estimate-live-data-size"
|
|
// "rocksdb.total-sst-files-size"
|
|
// "rocksdb.base-level"
|
|
// "rocksdb.estimate-pending-compaction-bytes"
|
|
// "rocksdb.num-running-compactions"
|
|
// "rocksdb.num-running-flushes"
|
|
virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
|
|
const Slice& property, uint64_t* value) = 0;
|
|
virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
|
|
return GetIntProperty(DefaultColumnFamily(), property, value);
|
|
}
|
|
|
|
// Same as GetIntProperty(), but this one returns the aggregated int
|
|
// property from all column families.
|
|
virtual bool GetAggregatedIntProperty(const Slice& property,
|
|
uint64_t* value) = 0;
|
|
|
|
// For each i in [0,n-1], store in "sizes[i]", the approximate
|
|
// file system space used by keys in "[range[i].start .. range[i].limit)".
|
|
//
|
|
// Note that the returned sizes measure file system space usage, so
|
|
// if the user data compresses by a factor of ten, the returned
|
|
// sizes will be one-tenth the size of the corresponding user data size.
|
|
//
|
|
// If include_memtable is set to true, then the result will also
|
|
// include those recently written data in the mem-tables if
|
|
// the mem-table type supports it.
|
|
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
|
const Range* range, int n, uint64_t* sizes,
|
|
bool include_memtable = false) = 0;
|
|
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
|
|
bool include_memtable = false) {
|
|
GetApproximateSizes(DefaultColumnFamily(), range, n, sizes,
|
|
include_memtable);
|
|
}
|
|
|
|
// Compact the underlying storage for the key range [*begin,*end].
|
|
// The actual compaction interval might be superset of [*begin, *end].
|
|
// In particular, deleted and overwritten versions are discarded,
|
|
// and the data is rearranged to reduce the cost of operations
|
|
// needed to access the data. This operation should typically only
|
|
// be invoked by users who understand the underlying implementation.
|
|
//
|
|
// begin==nullptr is treated as a key before all keys in the database.
|
|
// end==nullptr is treated as a key after all keys in the database.
|
|
// Therefore the following call will compact the entire database:
|
|
// db->CompactRange(options, nullptr, nullptr);
|
|
// Note that after the entire database is compacted, all data are pushed
|
|
// down to the last level containing any data. If the total data size after
|
|
// compaction is reduced, that level might not be appropriate for hosting all
|
|
// the files. In this case, client could set options.change_level to true, to
|
|
// move the files back to the minimum level capable of holding the data set
|
|
// or a given level (specified by non-negative options.target_level).
|
|
virtual Status CompactRange(const CompactRangeOptions& options,
|
|
ColumnFamilyHandle* column_family,
|
|
const Slice* begin, const Slice* end) = 0;
|
|
virtual Status CompactRange(const CompactRangeOptions& options,
|
|
const Slice* begin, const Slice* end) {
|
|
return CompactRange(options, DefaultColumnFamily(), begin, end);
|
|
}
|
|
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
__attribute__((deprecated))
|
|
#elif _WIN32
|
|
__declspec(deprecated)
|
|
#endif
|
|
virtual Status
|
|
CompactRange(ColumnFamilyHandle* column_family, const Slice* begin,
|
|
const Slice* end, bool change_level = false,
|
|
int target_level = -1, uint32_t target_path_id = 0) {
|
|
CompactRangeOptions options;
|
|
options.change_level = change_level;
|
|
options.target_level = target_level;
|
|
options.target_path_id = target_path_id;
|
|
return CompactRange(options, column_family, begin, end);
|
|
}
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
__attribute__((deprecated))
|
|
#elif _WIN32
|
|
__declspec(deprecated)
|
|
#endif
|
|
virtual Status
|
|
CompactRange(const Slice* begin, const Slice* end,
|
|
bool change_level = false, int target_level = -1,
|
|
uint32_t target_path_id = 0) {
|
|
CompactRangeOptions options;
|
|
options.change_level = change_level;
|
|
options.target_level = target_level;
|
|
options.target_path_id = target_path_id;
|
|
return CompactRange(options, DefaultColumnFamily(), begin, end);
|
|
}
|
|
|
|
virtual Status SetOptions(
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
const std::unordered_map<std::string, std::string>& /*new_options*/) {
|
|
return Status::NotSupported("Not implemented");
|
|
}
|
|
virtual Status SetOptions(
|
|
const std::unordered_map<std::string, std::string>& new_options) {
|
|
return SetOptions(DefaultColumnFamily(), new_options);
|
|
}
|
|
|
|
// CompactFiles() inputs a list of files specified by file numbers and
|
|
// compacts them to the specified level. Note that the behavior is different
|
|
// from CompactRange() in that CompactFiles() performs the compaction job
|
|
// using the CURRENT thread.
|
|
//
|
|
// @see GetDataBaseMetaData
|
|
// @see GetColumnFamilyMetaData
|
|
virtual Status CompactFiles(
|
|
const CompactionOptions& compact_options,
|
|
ColumnFamilyHandle* column_family,
|
|
const std::vector<std::string>& input_file_names,
|
|
const int output_level, const int output_path_id = -1) = 0;
|
|
|
|
virtual Status CompactFiles(
|
|
const CompactionOptions& compact_options,
|
|
const std::vector<std::string>& input_file_names,
|
|
const int output_level, const int output_path_id = -1) {
|
|
return CompactFiles(compact_options, DefaultColumnFamily(),
|
|
input_file_names, output_level, output_path_id);
|
|
}
|
|
|
|
// This function will wait until all currently running background processes
|
|
// finish. After it returns, no background process will be run until
|
|
// UnblockBackgroundWork is called
|
|
virtual Status PauseBackgroundWork() = 0;
|
|
virtual Status ContinueBackgroundWork() = 0;
|
|
|
|
// This function will enable automatic compactions for the given column
|
|
// families if they were previously disabled. The function will first set the
|
|
// disable_auto_compactions option for each column family to 'false', after
|
|
// which it will schedule a flush/compaction.
|
|
//
|
|
// NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
|
|
// does NOT schedule a flush/compaction afterwards, and only changes the
|
|
// parameter itself within the column family option.
|
|
//
|
|
virtual Status EnableAutoCompaction(
|
|
const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;
|
|
|
|
// Number of levels used for this DB.
|
|
virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
|
|
virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
|
|
|
|
// Maximum level to which a new compacted memtable is pushed if it
|
|
// does not create overlap.
|
|
virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
|
|
virtual int MaxMemCompactionLevel() {
|
|
return MaxMemCompactionLevel(DefaultColumnFamily());
|
|
}
|
|
|
|
// Number of files in level-0 that would stop writes.
|
|
virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
|
|
virtual int Level0StopWriteTrigger() {
|
|
return Level0StopWriteTrigger(DefaultColumnFamily());
|
|
}
|
|
|
|
// Get DB name -- the exact same name that was provided as an argument to
|
|
// DB::Open()
|
|
virtual const std::string& GetName() const = 0;
|
|
|
|
// Get Env object from the DB
|
|
virtual Env* GetEnv() const = 0;
|
|
|
|
// Get DB Options that we use. During the process of opening the
|
|
// column family, the options provided when calling DB::Open() or
|
|
// DB::CreateColumnFamily() will have been "sanitized" and transformed
|
|
// in an implementation-defined manner.
|
|
virtual const Options& GetOptions(ColumnFamilyHandle* column_family)
|
|
const = 0;
|
|
virtual const Options& GetOptions() const {
|
|
return GetOptions(DefaultColumnFamily());
|
|
}
|
|
|
|
virtual const DBOptions& GetDBOptions() const = 0;
|
|
|
|
// Flush all mem-table data.
|
|
virtual Status Flush(const FlushOptions& options,
|
|
ColumnFamilyHandle* column_family) = 0;
|
|
virtual Status Flush(const FlushOptions& options) {
|
|
return Flush(options, DefaultColumnFamily());
|
|
}
|
|
|
|
// Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
|
|
// same as Write() with sync=true: in the latter case the changes won't be
|
|
// visible until the sync is done.
|
|
// Currently only works if allow_mmap_writes = false in Options.
|
|
virtual Status SyncWAL() = 0;
|
|
|
|
// The sequence number of the most recent transaction.
|
|
virtual SequenceNumber GetLatestSequenceNumber() const = 0;
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
// Prevent file deletions. Compactions will continue to occur,
|
|
// but no obsolete files will be deleted. Calling this multiple
|
|
// times have the same effect as calling it once.
|
|
virtual Status DisableFileDeletions() = 0;
|
|
|
|
// Allow compactions to delete obsolete files.
|
|
// If force == true, the call to EnableFileDeletions() will guarantee that
|
|
// file deletions are enabled after the call, even if DisableFileDeletions()
|
|
// was called multiple times before.
|
|
// If force == false, EnableFileDeletions will only enable file deletion
|
|
// after it's been called at least as many times as DisableFileDeletions(),
|
|
// enabling the two methods to be called by two threads concurrently without
|
|
// synchronization -- i.e., file deletions will be enabled only after both
|
|
// threads call EnableFileDeletions()
|
|
virtual Status EnableFileDeletions(bool force = true) = 0;
|
|
|
|
// GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
|
|
|
|
// Retrieve the list of all files in the database. The files are
|
|
// relative to the dbname and are not absolute paths. The valid size of the
|
|
// manifest file is returned in manifest_file_size. The manifest file is an
|
|
// ever growing file, but only the portion specified by manifest_file_size is
|
|
// valid for this snapshot.
|
|
// Setting flush_memtable to true does Flush before recording the live files.
|
|
// Setting flush_memtable to false is useful when we don't want to wait for
|
|
// flush which may have to wait for compaction to complete taking an
|
|
// indeterminate time.
|
|
//
|
|
// In case you have multiple column families, even if flush_memtable is true,
|
|
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
|
|
// for new data that arrived to already-flushed column families while other
|
|
// column families were flushing
|
|
virtual Status GetLiveFiles(std::vector<std::string>&,
|
|
uint64_t* manifest_file_size,
|
|
bool flush_memtable = true) = 0;
|
|
|
|
// Retrieve the sorted list of all wal files with earliest file first
|
|
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
|
|
|
|
// Sets iter to an iterator that is positioned at a write-batch containing
|
|
// seq_number. If the sequence number is non existent, it returns an iterator
|
|
// at the first available seq_no after the requested seq_no
|
|
// Returns Status::OK if iterator is valid
|
|
// Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
|
|
// use this api, else the WAL files will get
|
|
// cleared aggressively and the iterator might keep getting invalid before
|
|
// an update is read.
|
|
virtual Status GetUpdatesSince(
|
|
SequenceNumber seq_number, unique_ptr<TransactionLogIterator>* iter,
|
|
const TransactionLogIterator::ReadOptions&
|
|
read_options = TransactionLogIterator::ReadOptions()) = 0;
|
|
|
|
// Windows API macro interference
|
|
#undef DeleteFile
|
|
// Delete the file name from the db directory and update the internal state to
|
|
// reflect that. Supports deletion of sst and log files only. 'name' must be
|
|
// path relative to the db directory. eg. 000001.sst, /archive/000003.log
|
|
virtual Status DeleteFile(std::string name) = 0;
|
|
|
|
// Returns a list of all table files with their level, start key
|
|
// and end key
|
|
virtual void GetLiveFilesMetaData(
|
|
std::vector<LiveFileMetaData>* /*metadata*/) {}
|
|
|
|
// Obtains the meta data of the specified column family of the DB.
|
|
// Status::NotFound() will be returned if the current DB does not have
|
|
// any column family match the specified name.
|
|
//
|
|
// If cf_name is not specified, then the metadata of the default
|
|
// column family will be returned.
|
|
virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
|
|
ColumnFamilyMetaData* /*metadata*/) {}
|
|
|
|
// Get the metadata of the default column family.
|
|
void GetColumnFamilyMetaData(
|
|
ColumnFamilyMetaData* metadata) {
|
|
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
|
|
}
|
|
|
|
// Load table file located at "file_path" into "column_family", a pointer to
|
|
// ExternalSstFileInfo can be used instead of "file_path" to do a blind add
|
|
// that wont need to read the file, move_file can be set to true to
|
|
// move the file instead of copying it.
|
|
//
|
|
// Current Requirements:
|
|
// (1) Key range in loaded table file don't overlap with
|
|
// existing keys or tombstones in DB.
|
|
// (2) No other writes happen during AddFile call, otherwise
|
|
// DB may get corrupted.
|
|
// (3) No snapshots are held.
|
|
virtual Status AddFile(ColumnFamilyHandle* column_family,
|
|
const std::string& file_path,
|
|
bool move_file = false) = 0;
|
|
virtual Status AddFile(const std::string& file_path, bool move_file = false) {
|
|
return AddFile(DefaultColumnFamily(), file_path, move_file);
|
|
}
|
|
|
|
// Load table file with information "file_info" into "column_family"
|
|
virtual Status AddFile(ColumnFamilyHandle* column_family,
|
|
const ExternalSstFileInfo* file_info,
|
|
bool move_file = false) = 0;
|
|
virtual Status AddFile(const ExternalSstFileInfo* file_info,
|
|
bool move_file = false) {
|
|
return AddFile(DefaultColumnFamily(), file_info, move_file);
|
|
}
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
// Sets the globally unique ID created at database creation time by invoking
|
|
// Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
|
|
// be set properly
|
|
virtual Status GetDbIdentity(std::string& identity) const = 0;
|
|
|
|
// Returns default column family handle
|
|
virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
|
|
TablePropertiesCollection* props) = 0;
|
|
virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
|
|
return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
|
|
}
|
|
virtual Status GetPropertiesOfTablesInRange(
|
|
ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
|
|
TablePropertiesCollection* props) = 0;
|
|
#endif // ROCKSDB_LITE
|
|
|
|
// Needed for StackableDB
|
|
virtual DB* GetRootDB() { return this; }
|
|
|
|
private:
|
|
// No copying allowed
|
|
DB(const DB&);
|
|
void operator=(const DB&);
|
|
};
|
|
|
|
// Destroy the contents of the specified database.
|
|
// Be very careful using this method.
|
|
Status DestroyDB(const std::string& name, const Options& options);
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
// If a DB cannot be opened, you may attempt to call this method to
|
|
// resurrect as much of the contents of the database as possible.
|
|
// Some data may be lost, so be careful when calling this function
|
|
// on a database that contains important information.
|
|
Status RepairDB(const std::string& dbname, const Options& options);
|
|
#endif
|
|
|
|
} // namespace rocksdb
|
|
|
|
#endif // STORAGE_ROCKSDB_INCLUDE_DB_H_
|