2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-16 01:03:42 +02:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2012-11-06 04:18:49 +01:00
|
|
|
|
2013-10-05 07:32:05 +02:00
|
|
|
#pragma once
|
2014-11-26 20:37:59 +01:00
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
2012-11-06 04:18:49 +01:00
|
|
|
#include "db/db_impl.h"
|
2014-02-04 00:28:03 +01:00
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
2012-11-06 04:18:49 +01:00
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
namespace rocksdb {
|
2012-11-06 04:18:49 +01:00
|
|
|
|
|
|
|
class DBImplReadOnly : public DBImpl {
|
2014-02-04 00:28:03 +01:00
|
|
|
public:
|
2014-02-05 22:12:23 +01:00
|
|
|
DBImplReadOnly(const DBOptions& options, const std::string& dbname);
|
2014-02-04 00:28:03 +01:00
|
|
|
virtual ~DBImplReadOnly();
|
2012-11-06 04:18:49 +01:00
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
// Implementations of the DB interface
|
|
|
|
using DB::Get;
|
|
|
|
virtual Status Get(const ReadOptions& options,
|
2014-02-11 02:04:44 +01:00
|
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
2017-03-13 19:44:50 +01:00
|
|
|
PinnableSlice* value) override;
|
[RocksDB] [MergeOperator] The new Merge Interface! Uses merge sequences.
Summary:
Here are the major changes to the Merge Interface. It has been expanded
to handle cases where the MergeOperator is not associative. It does so by stacking
up merge operations while scanning through the key history (i.e.: during Get() or
Compaction), until a valid Put/Delete/end-of-history is encountered; it then
applies all of the merge operations in the correct sequence starting with the
base/sentinel value.
I have also introduced an "AssociativeMerge" function which allows the user to
take advantage of associative merge operations (such as in the case of counters).
The implementation will always attempt to merge the operations/operands themselves
together when they are encountered, and will resort to the "stacking" method if
and only if the "associative-merge" fails.
This implementation is conjectured to allow MergeOperator to handle the general
case, while still providing the user with the ability to take advantage of certain
efficiencies in their own merge-operator / data-structure.
NOTE: This is a preliminary diff. This must still go through a lot of review,
revision, and testing. Feedback welcome!
Test Plan:
-This is a preliminary diff. I have only just begun testing/debugging it.
-I will be testing this with the existing MergeOperator use-cases and unit-tests
(counters, string-append, and redis-lists)
-I will be "desk-checking" and walking through the code with the help gdb.
-I will find a way of stress-testing the new interface / implementation using
db_bench, db_test, merge_test, and/or db_stress.
-I will ensure that my tests cover all cases: Get-Memtable,
Get-Immutable-Memtable, Get-from-Disk, Iterator-Range-Scan, Flush-Memtable-to-L0,
Compaction-L0-L1, Compaction-Ln-L(n+1), Put/Delete found, Put/Delete not-found,
end-of-history, end-of-file, etc.
-A lot of feedback from the reviewers.
Reviewers: haobo, dhruba, zshao, emayanke
Reviewed By: haobo
CC: leveldb
Differential Revision: https://reviews.facebook.net/D11499
2013-08-06 05:14:32 +02:00
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
// TODO: Implement ReadOnly MultiGet?
|
[RocksDB] [MergeOperator] The new Merge Interface! Uses merge sequences.
Summary:
Here are the major changes to the Merge Interface. It has been expanded
to handle cases where the MergeOperator is not associative. It does so by stacking
up merge operations while scanning through the key history (i.e.: during Get() or
Compaction), until a valid Put/Delete/end-of-history is encountered; it then
applies all of the merge operations in the correct sequence starting with the
base/sentinel value.
I have also introduced an "AssociativeMerge" function which allows the user to
take advantage of associative merge operations (such as in the case of counters).
The implementation will always attempt to merge the operations/operands themselves
together when they are encountered, and will resort to the "stacking" method if
and only if the "associative-merge" fails.
This implementation is conjectured to allow MergeOperator to handle the general
case, while still providing the user with the ability to take advantage of certain
efficiencies in their own merge-operator / data-structure.
NOTE: This is a preliminary diff. This must still go through a lot of review,
revision, and testing. Feedback welcome!
Test Plan:
-This is a preliminary diff. I have only just begun testing/debugging it.
-I will be testing this with the existing MergeOperator use-cases and unit-tests
(counters, string-append, and redis-lists)
-I will be "desk-checking" and walking through the code with the help gdb.
-I will find a way of stress-testing the new interface / implementation using
db_bench, db_test, merge_test, and/or db_stress.
-I will ensure that my tests cover all cases: Get-Memtable,
Get-Immutable-Memtable, Get-from-Disk, Iterator-Range-Scan, Flush-Memtable-to-L0,
Compaction-L0-L1, Compaction-Ln-L(n+1), Put/Delete found, Put/Delete not-found,
end-of-history, end-of-file, etc.
-A lot of feedback from the reviewers.
Reviewers: haobo, dhruba, zshao, emayanke
Reviewed By: haobo
CC: leveldb
Differential Revision: https://reviews.facebook.net/D11499
2013-08-06 05:14:32 +02:00
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
using DBImpl::NewIterator;
|
|
|
|
virtual Iterator* NewIterator(const ReadOptions&,
|
2014-07-31 03:21:55 +02:00
|
|
|
ColumnFamilyHandle* column_family) override;
|
2012-11-06 04:18:49 +01:00
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
virtual Status NewIterators(
|
|
|
|
const ReadOptions& options,
|
2014-07-23 22:52:11 +02:00
|
|
|
const std::vector<ColumnFamilyHandle*>& column_families,
|
2014-07-31 03:21:55 +02:00
|
|
|
std::vector<Iterator*>* iterators) override;
|
[RocksDB] [Column Family] Interface proposal
Summary:
<This diff is for Column Family branch>
Sharing some of the work I've done so far. This diff compiles and passes the tests.
The biggest change is in options.h - I broke down Options into two parts - DBOptions and ColumnFamilyOptions. DBOptions is DB-specific (env, create_if_missing, block_cache, etc.) and ColumnFamilyOptions is column family-specific (all compaction options, compresion options, etc.). Note that this does not break backwards compatibility at all.
Further, I created DBWithColumnFamily which inherits DB interface and adds new functions with column family support. Clients can transparently switch to DBWithColumnFamily and it will not break their backwards compatibility.
There are few methods worth checking out: ListColumnFamilies(), MultiNewIterator(), MultiGet() and GetSnapshot(). [GetSnapshot() returns the snapshot across all column families for now - I think that's what we agreed on]
Finally, I made small changes to WriteBatch so we are able to atomically insert data across column families.
Please provide feedback.
Test Plan: make check works, the code is backward compatible
Reviewers: dhruba, haobo, sdong, kailiu, emayanke
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14445
2013-12-03 20:14:09 +01:00
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
using DBImpl::Put;
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status Put(const WriteOptions& /*options*/,
|
|
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
|
|
const Slice& /*key*/, const Slice& /*value*/) override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
|
|
|
using DBImpl::Merge;
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status Merge(const WriteOptions& /*options*/,
|
|
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
|
|
const Slice& /*key*/, const Slice& /*value*/) override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
|
|
|
using DBImpl::Delete;
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status Delete(const WriteOptions& /*options*/,
|
|
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
|
|
const Slice& /*key*/) override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
using DBImpl::SingleDelete;
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status SingleDelete(const WriteOptions& /*options*/,
|
|
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
|
|
const Slice& /*key*/) override {
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status Write(const WriteOptions& /*options*/,
|
|
|
|
WriteBatch* /*updates*/) override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
|
|
|
using DBImpl::CompactRange;
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status CompactRange(const CompactRangeOptions& /*options*/,
|
|
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
|
|
const Slice* /*begin*/,
|
|
|
|
const Slice* /*end*/) override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
2014-08-28 18:46:05 +02:00
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
using DBImpl::CompactFiles;
|
|
|
|
virtual Status CompactFiles(
|
2018-03-05 22:08:17 +01:00
|
|
|
const CompactionOptions& /*compact_options*/,
|
|
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
|
|
const std::vector<std::string>& /*input_file_names*/,
|
2018-03-15 19:46:16 +01:00
|
|
|
const int /*output_level*/, const int /*output_path_id*/ = -1,
|
2018-12-13 23:12:02 +01:00
|
|
|
std::vector<std::string>* const /*output_file_names*/ = nullptr,
|
|
|
|
CompactionJobInfo* /*compaction_job_info*/ = nullptr) override {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
|
|
|
|
2014-07-31 03:21:55 +02:00
|
|
|
virtual Status DisableFileDeletions() override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status EnableFileDeletions(bool /*force*/) override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
2018-12-08 02:03:49 +01:00
|
|
|
virtual Status GetLiveFiles(std::vector<std::string>& ret,
|
|
|
|
uint64_t* manifest_file_size,
|
|
|
|
bool /*flush_memtable*/) override {
|
|
|
|
return DBImpl::GetLiveFiles(ret, manifest_file_size,
|
|
|
|
false /* flush_memtable */);
|
2014-02-04 00:28:03 +01:00
|
|
|
}
|
2014-08-28 18:46:05 +02:00
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
using DBImpl::Flush;
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status Flush(const FlushOptions& /*options*/,
|
|
|
|
ColumnFamilyHandle* /*column_family*/) override {
|
2014-02-04 00:28:03 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
2012-11-06 04:18:49 +01:00
|
|
|
|
2015-09-26 06:19:29 +02:00
|
|
|
using DBImpl::SyncWAL;
|
|
|
|
virtual Status SyncWAL() override {
|
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
|
|
|
|
2017-01-20 00:43:51 +01:00
|
|
|
using DB::IngestExternalFile;
|
|
|
|
virtual Status IngestExternalFile(
|
2018-03-05 22:08:17 +01:00
|
|
|
ColumnFamilyHandle* /*column_family*/,
|
|
|
|
const std::vector<std::string>& /*external_files*/,
|
|
|
|
const IngestExternalFileOptions& /*ingestion_options*/) override {
|
2017-01-20 00:43:51 +01:00
|
|
|
return Status::NotSupported("Not supported operation in read only mode.");
|
|
|
|
}
|
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
private:
|
|
|
|
friend class DB;
|
2012-11-06 04:18:49 +01:00
|
|
|
|
2014-02-04 00:28:03 +01:00
|
|
|
// No copying allowed
|
|
|
|
DBImplReadOnly(const DBImplReadOnly&);
|
|
|
|
void operator=(const DBImplReadOnly&);
|
2012-11-06 04:18:49 +01:00
|
|
|
};
|
|
|
|
}
|
2014-11-26 20:37:59 +01:00
|
|
|
|
|
|
|
#endif // !ROCKSDB_LITE
|