Encryption at rest support

Summary:
This PR adds support for encrypting data stored by RocksDB when written to disk.

It adds an `EncryptedEnv` override of the `Env` class with matching overrides for sequential&random access files.
The encryption itself is done through a configurable `EncryptionProvider`. This class creates is asked to create `BlockAccessCipherStream` for a file. This is where the actual encryption/decryption is being done.
Currently there is a Counter mode implementation of `BlockAccessCipherStream` with a `ROT13` block cipher (NOTE the `ROT13` is for demo purposes only!!).

The Counter operation mode uses an initial counter & random initialization vector (IV).
Both are created randomly for each file and stored in a 4K (default size) block that is prefixed to that file. The `EncryptedEnv` implementation is such that clients of the `Env` class do not see this prefix (nor data, nor in filesize).
The largest part of the prefix block is also encrypted, and there is room left for implementation specific settings/values/keys in there.

To test the encryption, the `DBTestBase` class has been extended to consider a new environment variable called `ENCRYPTED_ENV`. If set, the test will setup a encrypted instance of the `Env` class to use for all tests.
Typically you would run it like this:

```
ENCRYPTED_ENV=1 make check_some
```

There is also an added test that checks that some data inserted into the database is or is not "visible" on disk. With `ENCRYPTED_ENV` active it must not find plain text strings, with `ENCRYPTED_ENV` unset, it must find the plain text strings.
Closes https://github.com/facebook/rocksdb/pull/2424

Differential Revision: D5322178

Pulled By: sdwilsh

fbshipit-source-id: 253b0a9c2c498cc98f580df7f2623cbf7678a27f
This commit is contained in:
Ewout Prangsma 2017-06-26 16:52:06 -07:00 committed by Facebook Github Bot
parent 7061912c23
commit 51778612c9
15 changed files with 1253 additions and 6 deletions

View File

@ -380,6 +380,7 @@ set(SOURCES
db/write_thread.cc db/write_thread.cc
env/env.cc env/env.cc
env/env_chroot.cc env/env_chroot.cc
env/env_encryption.cc
env/env_hdfs.cc env/env_hdfs.cc
env/mock_env.cc env/mock_env.cc
memtable/alloc_tracker.cc memtable/alloc_tracker.cc

View File

@ -323,6 +323,7 @@ EXPOBJECTS = $(EXP_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL)
TESTS = \ TESTS = \
db_basic_test \ db_basic_test \
db_encryption_test \
db_test2 \ db_test2 \
external_sst_file_basic_test \ external_sst_file_basic_test \
auto_roll_logger_test \ auto_roll_logger_test \
@ -1030,6 +1031,9 @@ slice_transform_test: util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS)
db_basic_test: db/db_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) db_basic_test: db/db_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK) $(AM_LINK)
db_encryption_test: db/db_encryption_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK) $(AM_LINK)

View File

@ -105,6 +105,7 @@ cpp_library(
"db/write_thread.cc", "db/write_thread.cc",
"env/env.cc", "env/env.cc",
"env/env_chroot.cc", "env/env_chroot.cc",
"env/env_encryption.cc",
"env/env_hdfs.cc", "env/env_hdfs.cc",
"env/env_posix.cc", "env/env_posix.cc",
"env/io_posix.cc", "env/io_posix.cc",
@ -446,6 +447,7 @@ ROCKS_TESTS = [['merger_test', 'table/merger_test.cc', 'serial'],
['options_util_test', 'utilities/options/options_util_test.cc', 'serial'], ['options_util_test', 'utilities/options/options_util_test.cc', 'serial'],
['dynamic_bloom_test', 'util/dynamic_bloom_test.cc', 'serial'], ['dynamic_bloom_test', 'util/dynamic_bloom_test.cc', 'serial'],
['db_basic_test', 'db/db_basic_test.cc', 'serial'], ['db_basic_test', 'db/db_basic_test.cc', 'serial'],
['db_encryption_test', 'db/db_encryption_test.cc', 'serial'],
['db_merge_operator_test', 'db/db_merge_operator_test.cc', 'serial'], ['db_merge_operator_test', 'db/db_merge_operator_test.cc', 'serial'],
['manual_compaction_test', 'db/manual_compaction_test.cc', 'parallel'], ['manual_compaction_test', 'db/manual_compaction_test.cc', 'parallel'],
['delete_scheduler_test', 'util/delete_scheduler_test.cc', 'serial'], ['delete_scheduler_test', 'util/delete_scheduler_test.cc', 'serial'],

View File

@ -824,6 +824,9 @@ TEST_F(DBBasicTest, ChecksumTest) {
// sense to run // sense to run
#ifndef OS_WIN #ifndef OS_WIN
TEST_F(DBBasicTest, MmapAndBufferOptions) { TEST_F(DBBasicTest, MmapAndBufferOptions) {
if (!IsMemoryMappedAccessSupported()) {
return;
}
Options options = CurrentOptions(); Options options = CurrentOptions();
options.use_direct_reads = true; options.use_direct_reads = true;

96
db/db_encryption_test.cc Normal file
View File

@ -0,0 +1,96 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
//
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "rocksdb/perf_context.h"
#if !defined(ROCKSDB_LITE)
#include "util/sync_point.h"
#endif
#include <iostream>
#include <string>
namespace rocksdb {
class DBEncryptionTest : public DBTestBase {
public:
DBEncryptionTest() : DBTestBase("/db_encryption_test") {}
};
#ifndef ROCKSDB_LITE
TEST_F(DBEncryptionTest, CheckEncrypted) {
ASSERT_OK(Put("foo567", "v1.fetdq"));
ASSERT_OK(Put("bar123", "v2.dfgkjdfghsd"));
Close();
// Open all files and look for the values we've put in there.
// They should not be found if encrypted, otherwise
// they should be found.
std::vector<std::string> fileNames;
auto status = env_->GetChildren(dbname_, &fileNames);
ASSERT_OK(status);
auto defaultEnv = Env::Default();
int hits = 0;
for (auto it = fileNames.begin() ; it != fileNames.end(); ++it) {
if ((*it == "..") || (*it == ".")) {
continue;
}
auto filePath = dbname_ + "/" + *it;
unique_ptr<SequentialFile> seqFile;
auto envOptions = EnvOptions(CurrentOptions());
status = defaultEnv->NewSequentialFile(filePath, &seqFile, envOptions);
ASSERT_OK(status);
uint64_t fileSize;
status = defaultEnv->GetFileSize(filePath, &fileSize);
ASSERT_OK(status);
std::string scratch;
scratch.reserve(fileSize);
Slice data;
status = seqFile->Read(fileSize, &data, (char*)scratch.data());
ASSERT_OK(status);
if (data.ToString().find("foo567") != std::string::npos) {
hits++;
//std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("v1.fetdq") != std::string::npos) {
hits++;
//std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("bar123") != std::string::npos) {
hits++;
//std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("v2.dfgkjdfghsd") != std::string::npos) {
hits++;
//std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("dfgk") != std::string::npos) {
hits++;
//std::cout << "Hit in " << filePath << "\n";
}
}
if (encrypted_env_) {
ASSERT_EQ(hits, 0);
} else {
ASSERT_GE(hits, 4);
}
}
#endif // ROCKSDB_LITE
} // namespace rocksdb
int main(int argc, char** argv) {
rocksdb::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -416,6 +416,7 @@ TEST_F(DBOptionsTest, DeleteObsoleteFilesPeriodChange) {
TEST_F(DBOptionsTest, MaxOpenFilesChange) { TEST_F(DBOptionsTest, MaxOpenFilesChange) {
SpecialEnv env(env_); SpecialEnv env(env_);
Options options; Options options;
options.env = CurrentOptions().env;
options.max_open_files = -1; options.max_open_files = -1;
Reopen(options); Reopen(options);

View File

@ -29,6 +29,9 @@ class DBRangeDelTest : public DBTestBase {
// ROCKSDB_LITE // ROCKSDB_LITE
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) { TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) {
if (!IsMemoryMappedAccessSupported()) {
return;
}
Options opts = CurrentOptions(); Options opts = CurrentOptions();
opts.table_factory.reset(new PlainTableFactory()); opts.table_factory.reset(new PlainTableFactory());
opts.prefix_extractor.reset(NewNoopTransform()); opts.prefix_extractor.reset(NewNoopTransform());

View File

@ -1030,6 +1030,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
const int kNumL0Files = 5; const int kNumL0Files = 5;
Options options; Options options;
options.env = CurrentOptions().env; // Make sure to use any custom env that the test is configured with.
options.allow_concurrent_memtable_write = false; options.allow_concurrent_memtable_write = false;
options.arena_block_size = kBlockSizeBytes; options.arena_block_size = kBlockSizeBytes;
options.compaction_style = kCompactionStyleUniversal; options.compaction_style = kCompactionStyleUniversal;

View File

@ -9,6 +9,7 @@
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "db/forward_iterator.h" #include "db/forward_iterator.h"
#include "rocksdb/env_encryption.h"
namespace rocksdb { namespace rocksdb {
@ -42,9 +43,12 @@ SpecialEnv::SpecialEnv(Env* base)
table_write_callback_ = nullptr; table_write_callback_ = nullptr;
} }
ROT13BlockCipher rot13Cipher_(16);
DBTestBase::DBTestBase(const std::string path) DBTestBase::DBTestBase(const std::string path)
: mem_env_(!getenv("MEM_ENV") ? nullptr : new MockEnv(Env::Default())), : mem_env_(!getenv("MEM_ENV") ? nullptr : new MockEnv(Env::Default())),
env_(new SpecialEnv(mem_env_ ? mem_env_ : Env::Default())), encrypted_env_(!getenv("ENCRYPTED_ENV") ? nullptr : NewEncryptedEnv(mem_env_ ? mem_env_ : Env::Default(), new CTREncryptionProvider(rot13Cipher_))),
env_(new SpecialEnv(encrypted_env_ ? encrypted_env_ : (mem_env_ ? mem_env_ : Env::Default()))),
option_config_(kDefault) { option_config_(kDefault) {
env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::LOW);
env_->SetBackgroundThreads(1, Env::HIGH); env_->SetBackgroundThreads(1, Env::HIGH);
@ -281,6 +285,7 @@ Options DBTestBase::GetOptions(
"NewWritableFile:O_DIRECT"); "NewWritableFile:O_DIRECT");
#endif #endif
bool can_allow_mmap = IsMemoryMappedAccessSupported();
switch (option_config) { switch (option_config) {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
case kHashSkipList: case kHashSkipList:
@ -291,14 +296,14 @@ Options DBTestBase::GetOptions(
case kPlainTableFirstBytePrefix: case kPlainTableFirstBytePrefix:
options.table_factory.reset(new PlainTableFactory()); options.table_factory.reset(new PlainTableFactory());
options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.prefix_extractor.reset(NewFixedPrefixTransform(1));
options.allow_mmap_reads = true; options.allow_mmap_reads = can_allow_mmap;
options.max_sequential_skip_in_iterations = 999999; options.max_sequential_skip_in_iterations = 999999;
set_block_based_table_factory = false; set_block_based_table_factory = false;
break; break;
case kPlainTableCappedPrefix: case kPlainTableCappedPrefix:
options.table_factory.reset(new PlainTableFactory()); options.table_factory.reset(new PlainTableFactory());
options.prefix_extractor.reset(NewCappedPrefixTransform(8)); options.prefix_extractor.reset(NewCappedPrefixTransform(8));
options.allow_mmap_reads = true; options.allow_mmap_reads = can_allow_mmap;
options.max_sequential_skip_in_iterations = 999999; options.max_sequential_skip_in_iterations = 999999;
set_block_based_table_factory = false; set_block_based_table_factory = false;
break; break;
@ -312,7 +317,7 @@ Options DBTestBase::GetOptions(
case kPlainTableAllBytesPrefix: case kPlainTableAllBytesPrefix:
options.table_factory.reset(new PlainTableFactory()); options.table_factory.reset(new PlainTableFactory());
options.prefix_extractor.reset(NewNoopTransform()); options.prefix_extractor.reset(NewNoopTransform());
options.allow_mmap_reads = true; options.allow_mmap_reads = can_allow_mmap;
options.max_sequential_skip_in_iterations = 999999; options.max_sequential_skip_in_iterations = 999999;
set_block_based_table_factory = false; set_block_based_table_factory = false;
break; break;
@ -364,7 +369,7 @@ Options DBTestBase::GetOptions(
options.wal_dir = alternative_wal_dir_; options.wal_dir = alternative_wal_dir_;
// mmap reads should be orthogonal to WalDir setting, so we piggyback to // mmap reads should be orthogonal to WalDir setting, so we piggyback to
// this option config to test mmap reads as well // this option config to test mmap reads as well
options.allow_mmap_reads = true; options.allow_mmap_reads = can_allow_mmap;
break; break;
case kManifestFileSize: case kManifestFileSize:
options.max_manifest_file_size = 50; // 50 bytes options.max_manifest_file_size = 50; // 50 bytes
@ -384,7 +389,7 @@ Options DBTestBase::GetOptions(
options.num_levels = 8; options.num_levels = 8;
break; break;
case kCompressedBlockCache: case kCompressedBlockCache:
options.allow_mmap_writes = true; options.allow_mmap_writes = can_allow_mmap;
table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
break; break;
case kInfiniteMaxOpenFiles: case kInfiniteMaxOpenFiles:
@ -598,6 +603,10 @@ bool DBTestBase::IsDirectIOSupported() {
return s.ok(); return s.ok();
} }
bool DBTestBase::IsMemoryMappedAccessSupported() const {
return (!encrypted_env_);
}
Status DBTestBase::Flush(int cf) { Status DBTestBase::Flush(int cf) {
if (cf == 0) { if (cf == 0) {
return db_->Flush(FlushOptions()); return db_->Flush(FlushOptions());

View File

@ -671,6 +671,7 @@ class DBTestBase : public testing::Test {
std::string alternative_wal_dir_; std::string alternative_wal_dir_;
std::string alternative_db_log_dir_; std::string alternative_db_log_dir_;
MockEnv* mem_env_; MockEnv* mem_env_;
Env* encrypted_env_;
SpecialEnv* env_; SpecialEnv* env_;
DB* db_; DB* db_;
std::vector<ColumnFamilyHandle*> handles_; std::vector<ColumnFamilyHandle*> handles_;
@ -774,6 +775,8 @@ class DBTestBase : public testing::Test {
bool IsDirectIOSupported(); bool IsDirectIOSupported();
bool IsMemoryMappedAccessSupported() const;
Status Flush(int cf = 0); Status Flush(int cf = 0);
Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()); Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions());

View File

@ -114,6 +114,7 @@ TEST_F(EventListenerTest, OnSingleDBCompactionTest) {
const int kNumL0Files = 4; const int kNumL0Files = 4;
Options options; Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true; options.create_if_missing = true;
options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
options.compaction_style = kCompactionStyleLevel; options.compaction_style = kCompactionStyleLevel;
@ -233,6 +234,7 @@ class TestFlushListener : public EventListener {
TEST_F(EventListenerTest, OnSingleDBFlushTest) { TEST_F(EventListenerTest, OnSingleDBFlushTest) {
Options options; Options options;
options.env = CurrentOptions().env;
options.write_buffer_size = k110KB; options.write_buffer_size = k110KB;
#ifdef ROCKSDB_USING_THREAD_STATUS #ifdef ROCKSDB_USING_THREAD_STATUS
options.enable_thread_tracking = true; options.enable_thread_tracking = true;
@ -269,6 +271,7 @@ TEST_F(EventListenerTest, OnSingleDBFlushTest) {
TEST_F(EventListenerTest, MultiCF) { TEST_F(EventListenerTest, MultiCF) {
Options options; Options options;
options.env = CurrentOptions().env;
options.write_buffer_size = k110KB; options.write_buffer_size = k110KB;
#ifdef ROCKSDB_USING_THREAD_STATUS #ifdef ROCKSDB_USING_THREAD_STATUS
options.enable_thread_tracking = true; options.enable_thread_tracking = true;
@ -304,6 +307,7 @@ TEST_F(EventListenerTest, MultiCF) {
TEST_F(EventListenerTest, MultiDBMultiListeners) { TEST_F(EventListenerTest, MultiDBMultiListeners) {
Options options; Options options;
options.env = CurrentOptions().env;
#ifdef ROCKSDB_USING_THREAD_STATUS #ifdef ROCKSDB_USING_THREAD_STATUS
options.enable_thread_tracking = true; options.enable_thread_tracking = true;
#endif // ROCKSDB_USING_THREAD_STATUS #endif // ROCKSDB_USING_THREAD_STATUS
@ -386,6 +390,7 @@ TEST_F(EventListenerTest, MultiDBMultiListeners) {
TEST_F(EventListenerTest, DisableBGCompaction) { TEST_F(EventListenerTest, DisableBGCompaction) {
Options options; Options options;
options.env = CurrentOptions().env;
#ifdef ROCKSDB_USING_THREAD_STATUS #ifdef ROCKSDB_USING_THREAD_STATUS
options.enable_thread_tracking = true; options.enable_thread_tracking = true;
#endif // ROCKSDB_USING_THREAD_STATUS #endif // ROCKSDB_USING_THREAD_STATUS
@ -433,6 +438,7 @@ class TestCompactionReasonListener : public EventListener {
TEST_F(EventListenerTest, CompactionReasonLevel) { TEST_F(EventListenerTest, CompactionReasonLevel) {
Options options; Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true; options.create_if_missing = true;
options.memtable_factory.reset( options.memtable_factory.reset(
new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
@ -498,6 +504,7 @@ TEST_F(EventListenerTest, CompactionReasonLevel) {
TEST_F(EventListenerTest, CompactionReasonUniversal) { TEST_F(EventListenerTest, CompactionReasonUniversal) {
Options options; Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true; options.create_if_missing = true;
options.memtable_factory.reset( options.memtable_factory.reset(
new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
@ -559,6 +566,7 @@ TEST_F(EventListenerTest, CompactionReasonUniversal) {
TEST_F(EventListenerTest, CompactionReasonFIFO) { TEST_F(EventListenerTest, CompactionReasonFIFO) {
Options options; Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true; options.create_if_missing = true;
options.memtable_factory.reset( options.memtable_factory.reset(
new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
@ -781,6 +789,7 @@ TEST_F(EventListenerTest, ColumnFamilyHandleDeletionStartedListenerTest) {
auto listener = auto listener =
std::make_shared<ColumnFamilyHandleDeletionStartedListener>(cfs); std::make_shared<ColumnFamilyHandleDeletionStartedListener>(cfs);
Options options; Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true; options.create_if_missing = true;
options.listeners.push_back(listener); options.listeners.push_back(listener);
CreateAndReopenWithCF(cfs, options); CreateAndReopenWithCF(cfs, options);

911
env/env_encryption.cc vendored Normal file
View File

@ -0,0 +1,911 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
#ifndef ROCKSDB_LITE
#include <algorithm>
#include <cctype>
#include <iostream>
#include "rocksdb/env_encryption.h"
#include "util/aligned_buffer.h"
#include "util/coding.h"
#include "util/random.h"
#endif
namespace rocksdb {
#ifndef ROCKSDB_LITE
class EncryptedSequentialFile : public SequentialFile {
private:
std::unique_ptr<SequentialFile> file_;
std::unique_ptr<BlockAccessCipherStream> stream_;
uint64_t offset_;
size_t prefixLength_;
public:
// Default ctor. Given underlying sequential file is supposed to be at
// offset == prefixLength.
EncryptedSequentialFile(SequentialFile* f, BlockAccessCipherStream* s, size_t prefixLength)
: file_(f), stream_(s), offset_(prefixLength), prefixLength_(prefixLength) {
}
// Read up to "n" bytes from the file. "scratch[0..n-1]" may be
// written by this routine. Sets "*result" to the data that was
// read (including if fewer than "n" bytes were successfully read).
// May set "*result" to point at data in "scratch[0..n-1]", so
// "scratch[0..n-1]" must be live when "*result" is used.
// If an error was encountered, returns a non-OK status.
//
// REQUIRES: External synchronization
virtual Status Read(size_t n, Slice* result, char* scratch) override {
assert(scratch);
Status status = file_->Read(n, result, scratch);
if (!status.ok()) {
return status;
}
status = stream_->Decrypt(offset_, (char*)result->data(), result->size());
offset_ += result->size(); // We've already ready data from disk, so update offset_ even if decryption fails.
return status;
}
// Skip "n" bytes from the file. This is guaranteed to be no
// slower that reading the same data, but may be faster.
//
// If end of file is reached, skipping will stop at the end of the
// file, and Skip will return OK.
//
// REQUIRES: External synchronization
virtual Status Skip(uint64_t n) override {
auto status = file_->Skip(n);
if (!status.ok()) {
return status;
}
offset_ += n;
return status;
}
// Indicates the upper layers if the current SequentialFile implementation
// uses direct IO.
virtual bool use_direct_io() const override {
return file_->use_direct_io();
}
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const override {
return file_->GetRequiredBufferAlignment();
}
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
virtual Status InvalidateCache(size_t offset, size_t length) override {
return file_->InvalidateCache(offset + prefixLength_, length);
}
// Positioned Read for direct I/O
// If Direct I/O enabled, offset, n, and scratch should be properly aligned
virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result, char* scratch) override {
assert(scratch);
offset += prefixLength_; // Skip prefix
auto status = file_->PositionedRead(offset, n, result, scratch);
if (!status.ok()) {
return status;
}
offset_ = offset + result->size();
status = stream_->Decrypt(offset, (char*)result->data(), result->size());
return status;
}
};
// A file abstraction for randomly reading the contents of a file.
class EncryptedRandomAccessFile : public RandomAccessFile {
private:
std::unique_ptr<RandomAccessFile> file_;
std::unique_ptr<BlockAccessCipherStream> stream_;
size_t prefixLength_;
public:
EncryptedRandomAccessFile(RandomAccessFile* f, BlockAccessCipherStream* s, size_t prefixLength)
: file_(f), stream_(s), prefixLength_(prefixLength) { }
// Read up to "n" bytes from the file starting at "offset".
// "scratch[0..n-1]" may be written by this routine. Sets "*result"
// to the data that was read (including if fewer than "n" bytes were
// successfully read). May set "*result" to point at data in
// "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
// "*result" is used. If an error was encountered, returns a non-OK
// status.
//
// Safe for concurrent use by multiple threads.
// If Direct I/O enabled, offset, n, and scratch should be aligned properly.
virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override {
assert(scratch);
offset += prefixLength_;
auto status = file_->Read(offset, n, result, scratch);
if (!status.ok()) {
return status;
}
status = stream_->Decrypt(offset, (char*)result->data(), result->size());
return status;
}
// Readahead the file starting from offset by n bytes for caching.
virtual Status Prefetch(uint64_t offset, size_t n) override {
//return Status::OK();
return file_->Prefetch(offset + prefixLength_, n);
}
// Tries to get an unique ID for this file that will be the same each time
// the file is opened (and will stay the same while the file is open).
// Furthermore, it tries to make this ID at most "max_size" bytes. If such an
// ID can be created this function returns the length of the ID and places it
// in "id"; otherwise, this function returns 0, in which case "id"
// may not have been modified.
//
// This function guarantees, for IDs from a given environment, two unique ids
// cannot be made equal to eachother by adding arbitrary bytes to one of
// them. That is, no unique ID is the prefix of another.
//
// This function guarantees that the returned ID will not be interpretable as
// a single varint.
//
// Note: these IDs are only valid for the duration of the process.
virtual size_t GetUniqueId(char* id, size_t max_size) const override {
return file_->GetUniqueId(id, max_size);
};
virtual void Hint(AccessPattern pattern) override {
file_->Hint(pattern);
}
// Indicates the upper layers if the current RandomAccessFile implementation
// uses direct IO.
virtual bool use_direct_io() const override {
return file_->use_direct_io();
}
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const override {
return file_->GetRequiredBufferAlignment();
}
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
virtual Status InvalidateCache(size_t offset, size_t length) override {
return file_->InvalidateCache(offset + prefixLength_, length);
}
};
// A file abstraction for sequential writing. The implementation
// must provide buffering since callers may append small fragments
// at a time to the file.
class EncryptedWritableFile : public WritableFileWrapper {
private:
std::unique_ptr<WritableFile> file_;
std::unique_ptr<BlockAccessCipherStream> stream_;
size_t prefixLength_;
public:
// Default ctor. Prefix is assumed to be written already.
EncryptedWritableFile(WritableFile* f, BlockAccessCipherStream* s, size_t prefixLength)
: WritableFileWrapper(f), file_(f), stream_(s), prefixLength_(prefixLength) { }
Status Append(const Slice& data) override {
AlignedBuffer buf;
Status status;
Slice dataToAppend(data);
if (data.size() > 0) {
auto offset = file_->GetFileSize(); // size including prefix
// Encrypt in cloned buffer
buf.Alignment(GetRequiredBufferAlignment());
buf.AllocateNewBuffer(data.size());
memmove(buf.BufferStart(), data.data(), data.size());
status = stream_->Encrypt(offset, buf.BufferStart(), data.size());
if (!status.ok()) {
return status;
}
dataToAppend = Slice(buf.BufferStart(), data.size());
}
status = file_->Append(dataToAppend);
if (!status.ok()) {
return status;
}
return status;
}
Status PositionedAppend(const Slice& data, uint64_t offset) override {
AlignedBuffer buf;
Status status;
Slice dataToAppend(data);
offset += prefixLength_;
if (data.size() > 0) {
// Encrypt in cloned buffer
buf.Alignment(GetRequiredBufferAlignment());
buf.AllocateNewBuffer(data.size());
memmove(buf.BufferStart(), data.data(), data.size());
status = stream_->Encrypt(offset, buf.BufferStart(), data.size());
if (!status.ok()) {
return status;
}
dataToAppend = Slice(buf.BufferStart(), data.size());
}
status = file_->PositionedAppend(dataToAppend, offset);
if (!status.ok()) {
return status;
}
return status;
}
// Indicates the upper layers if the current WritableFile implementation
// uses direct IO.
virtual bool use_direct_io() const override { return file_->use_direct_io(); }
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const override { return file_->GetRequiredBufferAlignment(); }
/*
* Get the size of valid data in the file.
*/
virtual uint64_t GetFileSize() override {
return file_->GetFileSize() - prefixLength_;
}
// Truncate is necessary to trim the file to the correct size
// before closing. It is not always possible to keep track of the file
// size due to whole pages writes. The behavior is undefined if called
// with other writes to follow.
virtual Status Truncate(uint64_t size) override {
return file_->Truncate(size + prefixLength_);
}
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
// This call has no effect on dirty pages in the cache.
virtual Status InvalidateCache(size_t offset, size_t length) override {
return file_->InvalidateCache(offset + prefixLength_, length);
}
// Sync a file range with disk.
// offset is the starting byte of the file range to be synchronized.
// nbytes specifies the length of the range to be synchronized.
// This asks the OS to initiate flushing the cached data to disk,
// without waiting for completion.
// Default implementation does nothing.
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override {
return file_->RangeSync(offset + prefixLength_, nbytes);
}
// PrepareWrite performs any necessary preparation for a write
// before the write actually occurs. This allows for pre-allocation
// of space on devices where it can result in less file
// fragmentation and/or less waste from over-zealous filesystem
// pre-allocation.
virtual void PrepareWrite(size_t offset, size_t len) override {
file_->PrepareWrite(offset + prefixLength_, len);
}
// Pre-allocates space for a file.
virtual Status Allocate(uint64_t offset, uint64_t len) override {
return file_->Allocate(offset + prefixLength_, len);
}
};
// A file abstraction for random reading and writing.
class EncryptedRandomRWFile : public RandomRWFile {
private:
std::unique_ptr<RandomRWFile> file_;
std::unique_ptr<BlockAccessCipherStream> stream_;
size_t prefixLength_;
public:
EncryptedRandomRWFile(RandomRWFile* f, BlockAccessCipherStream* s, size_t prefixLength)
: file_(f), stream_(s), prefixLength_(prefixLength) {}
// Indicates if the class makes use of direct I/O
// If false you must pass aligned buffer to Write()
virtual bool use_direct_io() const override { return file_->use_direct_io(); }
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const override {
return file_->GetRequiredBufferAlignment();
}
// Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
// Pass aligned buffer when use_direct_io() returns true.
virtual Status Write(uint64_t offset, const Slice& data) override {
AlignedBuffer buf;
Status status;
Slice dataToWrite(data);
offset += prefixLength_;
if (data.size() > 0) {
// Encrypt in cloned buffer
buf.Alignment(GetRequiredBufferAlignment());
buf.AllocateNewBuffer(data.size());
memmove(buf.BufferStart(), data.data(), data.size());
status = stream_->Encrypt(offset, buf.BufferStart(), data.size());
if (!status.ok()) {
return status;
}
dataToWrite = Slice(buf.BufferStart(), data.size());
}
status = file_->Write(offset, dataToWrite);
return status;
}
// Read up to `n` bytes starting from offset `offset` and store them in
// result, provided `scratch` size should be at least `n`.
// Returns Status::OK() on success.
virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override {
assert(scratch);
offset += prefixLength_;
auto status = file_->Read(offset, n, result, scratch);
if (!status.ok()) {
return status;
}
status = stream_->Decrypt(offset, (char*)result->data(), result->size());
return status;
}
virtual Status Flush() override {
return file_->Flush();
}
virtual Status Sync() override {
return file_->Sync();
}
virtual Status Fsync() override {
return file_->Fsync();
}
virtual Status Close() override {
return file_->Close();
}
};
// EncryptedEnv implements an Env wrapper that adds encryption to files stored on disk.
class EncryptedEnv : public EnvWrapper {
public:
EncryptedEnv(Env* base_env, EncryptionProvider *provider)
: EnvWrapper(base_env) {
provider_ = provider;
}
// NewSequentialFile opens a file for sequential reading.
virtual Status NewSequentialFile(const std::string& fname,
std::unique_ptr<SequentialFile>* result,
const EnvOptions& options) override {
result->reset();
if (options.use_mmap_reads) {
return Status::InvalidArgument();
}
// Open file using underlying Env implementation
std::unique_ptr<SequentialFile> underlying;
auto status = EnvWrapper::NewSequentialFile(fname, &underlying, options);
if (!status.ok()) {
return status;
}
// Read prefix (if needed)
AlignedBuffer prefixBuf;
Slice prefixSlice;
size_t prefixLength = provider_->GetPrefixLength();
if (prefixLength > 0) {
// Read prefix
prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
prefixBuf.AllocateNewBuffer(prefixLength);
status = underlying->Read(prefixLength, &prefixSlice, prefixBuf.BufferStart());
if (!status.ok()) {
return status;
}
}
// Create cipher stream
std::unique_ptr<BlockAccessCipherStream> stream;
status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
if (!status.ok()) {
return status;
}
(*result) = std::unique_ptr<SequentialFile>(new EncryptedSequentialFile(underlying.release(), stream.release(), prefixLength));
return Status::OK();
}
// NewRandomAccessFile opens a file for random read access.
virtual Status NewRandomAccessFile(const std::string& fname,
unique_ptr<RandomAccessFile>* result,
const EnvOptions& options) override {
result->reset();
if (options.use_mmap_reads) {
return Status::InvalidArgument();
}
// Open file using underlying Env implementation
std::unique_ptr<RandomAccessFile> underlying;
auto status = EnvWrapper::NewRandomAccessFile(fname, &underlying, options);
if (!status.ok()) {
return status;
}
// Read prefix (if needed)
AlignedBuffer prefixBuf;
Slice prefixSlice;
size_t prefixLength = provider_->GetPrefixLength();
if (prefixLength > 0) {
// Read prefix
prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
prefixBuf.AllocateNewBuffer(prefixLength);
status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
if (!status.ok()) {
return status;
}
}
// Create cipher stream
std::unique_ptr<BlockAccessCipherStream> stream;
status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
if (!status.ok()) {
return status;
}
(*result) = std::unique_ptr<RandomAccessFile>(new EncryptedRandomAccessFile(underlying.release(), stream.release(), prefixLength));
return Status::OK();
}
// NewWritableFile opens a file for sequential writing.
virtual Status NewWritableFile(const std::string& fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options) override {
result->reset();
if (options.use_mmap_writes) {
return Status::InvalidArgument();
}
// Open file using underlying Env implementation
std::unique_ptr<WritableFile> underlying;
Status status = EnvWrapper::NewWritableFile(fname, &underlying, options);
if (!status.ok()) {
return status;
}
// Initialize & write prefix (if needed)
AlignedBuffer prefixBuf;
Slice prefixSlice;
size_t prefixLength = provider_->GetPrefixLength();
if (prefixLength > 0) {
// Initialize prefix
prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
prefixBuf.AllocateNewBuffer(prefixLength);
provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
// Write prefix
status = underlying->Append(prefixSlice);
if (!status.ok()) {
return status;
}
}
// Create cipher stream
std::unique_ptr<BlockAccessCipherStream> stream;
status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
if (!status.ok()) {
return status;
}
(*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
return Status::OK();
}
// Create an object that writes to a new file with the specified
// name. Deletes any existing file with the same name and creates a
// new file. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores nullptr in *result and
// returns non-OK.
//
// The returned file will only be accessed by one thread at a time.
virtual Status ReopenWritableFile(const std::string& fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options) override {
result->reset();
if (options.use_mmap_writes) {
return Status::InvalidArgument();
}
// Open file using underlying Env implementation
std::unique_ptr<WritableFile> underlying;
Status status = EnvWrapper::ReopenWritableFile(fname, &underlying, options);
if (!status.ok()) {
return status;
}
// Initialize & write prefix (if needed)
AlignedBuffer prefixBuf;
Slice prefixSlice;
size_t prefixLength = provider_->GetPrefixLength();
if (prefixLength > 0) {
// Initialize prefix
prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
prefixBuf.AllocateNewBuffer(prefixLength);
provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
// Write prefix
status = underlying->Append(prefixSlice);
if (!status.ok()) {
return status;
}
}
// Create cipher stream
std::unique_ptr<BlockAccessCipherStream> stream;
status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
if (!status.ok()) {
return status;
}
(*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
return Status::OK();
}
// Reuse an existing file by renaming it and opening it as writable.
virtual Status ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options) override {
result->reset();
if (options.use_mmap_writes) {
return Status::InvalidArgument();
}
// Open file using underlying Env implementation
std::unique_ptr<WritableFile> underlying;
Status status = EnvWrapper::ReuseWritableFile(fname, old_fname, &underlying, options);
if (!status.ok()) {
return status;
}
// Initialize & write prefix (if needed)
AlignedBuffer prefixBuf;
Slice prefixSlice;
size_t prefixLength = provider_->GetPrefixLength();
if (prefixLength > 0) {
// Initialize prefix
prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
prefixBuf.AllocateNewBuffer(prefixLength);
provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
// Write prefix
status = underlying->Append(prefixSlice);
if (!status.ok()) {
return status;
}
}
// Create cipher stream
std::unique_ptr<BlockAccessCipherStream> stream;
status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
if (!status.ok()) {
return status;
}
(*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
return Status::OK();
}
// Open `fname` for random read and write, if file dont exist the file
// will be created. On success, stores a pointer to the new file in
// *result and returns OK. On failure returns non-OK.
//
// The returned file will only be accessed by one thread at a time.
virtual Status NewRandomRWFile(const std::string& fname,
unique_ptr<RandomRWFile>* result,
const EnvOptions& options) override {
result->reset();
if (options.use_mmap_reads || options.use_mmap_writes) {
return Status::InvalidArgument();
}
// Check file exists
bool isNewFile = !FileExists(fname).ok();
// Open file using underlying Env implementation
std::unique_ptr<RandomRWFile> underlying;
Status status = EnvWrapper::NewRandomRWFile(fname, &underlying, options);
if (!status.ok()) {
return status;
}
// Read or Initialize & write prefix (if needed)
AlignedBuffer prefixBuf;
Slice prefixSlice;
size_t prefixLength = provider_->GetPrefixLength();
if (prefixLength > 0) {
prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
prefixBuf.AllocateNewBuffer(prefixLength);
if (!isNewFile) {
// File already exists, read prefix
status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
if (!status.ok()) {
return status;
}
} else {
// File is new, initialize & write prefix
provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
// Write prefix
status = underlying->Write(0, prefixSlice);
if (!status.ok()) {
return status;
}
}
}
// Create cipher stream
std::unique_ptr<BlockAccessCipherStream> stream;
status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
if (!status.ok()) {
return status;
}
(*result) = std::unique_ptr<RandomRWFile>(new EncryptedRandomRWFile(underlying.release(), stream.release(), prefixLength));
return Status::OK();
}
// Store in *result the attributes of the children of the specified directory.
// In case the implementation lists the directory prior to iterating the files
// and files are concurrently deleted, the deleted files will be omitted from
// result.
// The name attributes are relative to "dir".
// Original contents of *results are dropped.
// Returns OK if "dir" exists and "*result" contains its children.
// NotFound if "dir" does not exist, the calling process does not have
// permission to access "dir", or if "dir" is invalid.
// IOError if an IO Error was encountered
virtual Status GetChildrenFileAttributes(const std::string& dir, std::vector<FileAttributes>* result) override {
auto status = EnvWrapper::GetChildrenFileAttributes(dir, result);
if (!status.ok()) {
return status;
}
size_t prefixLength = provider_->GetPrefixLength();
for (auto it = std::begin(*result); it!=std::end(*result); ++it) {
assert(it->size_bytes >= prefixLength);
it->size_bytes -= prefixLength;
}
return Status::OK();
}
// Store the size of fname in *file_size.
virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) override {
auto status = EnvWrapper::GetFileSize(fname, file_size);
if (!status.ok()) {
return status;
}
size_t prefixLength = provider_->GetPrefixLength();
assert(*file_size >= prefixLength);
*file_size -= prefixLength;
return Status::OK();
}
private:
EncryptionProvider *provider_;
};
// Returns an Env that encrypts data when stored on disk and decrypts data when
// read from disk.
Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider) {
return new EncryptedEnv(base_env, provider);
}
// Encrypt one or more (partial) blocks of data at the file offset.
// Length of data is given in dataSize.
Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t dataSize) {
// Calculate block index
auto blockSize = BlockSize();
uint64_t blockIndex = fileOffset / blockSize;
size_t blockOffset = fileOffset % blockSize;
unique_ptr<char[]> blockBuffer;
std::string scratch;
AllocateScratch(scratch);
// Encrypt individual blocks.
while (1) {
char *block = data;
size_t n = std::min(dataSize, blockSize - blockOffset);
if (n != blockSize) {
// We're not encrypting a full block.
// Copy data to blockBuffer
if (!blockBuffer.get()) {
// Allocate buffer
blockBuffer = unique_ptr<char[]>(new char[blockSize]);
}
block = blockBuffer.get();
// Copy plain data to block buffer
memmove(block + blockOffset, data, n);
}
auto status = EncryptBlock(blockIndex, block, (char*)scratch.data());
if (!status.ok()) {
return status;
}
if (block != data) {
// Copy encrypted data back to `data`.
memmove(data, block + blockOffset, n);
}
dataSize -= n;
if (dataSize == 0) {
return Status::OK();
}
data += n;
blockOffset = 0;
blockIndex++;
}
}
// Decrypt one or more (partial) blocks of data at the file offset.
// Length of data is given in dataSize.
Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t dataSize) {
// Calculate block index
auto blockSize = BlockSize();
uint64_t blockIndex = fileOffset / blockSize;
size_t blockOffset = fileOffset % blockSize;
unique_ptr<char[]> blockBuffer;
std::string scratch;
AllocateScratch(scratch);
// Decrypt individual blocks.
while (1) {
char *block = data;
size_t n = std::min(dataSize, blockSize - blockOffset);
if (n != blockSize) {
// We're not decrypting a full block.
// Copy data to blockBuffer
if (!blockBuffer.get()) {
// Allocate buffer
blockBuffer = unique_ptr<char[]>(new char[blockSize]);
}
block = blockBuffer.get();
// Copy encrypted data to block buffer
memmove(block + blockOffset, data, n);
}
auto status = DecryptBlock(blockIndex, block, (char*)scratch.data());
if (!status.ok()) {
return status;
}
if (block != data) {
// Copy decrypted data back to `data`.
memmove(data, block + blockOffset, n);
}
dataSize -= n;
if (dataSize == 0) {
return Status::OK();
}
data += n;
blockOffset = 0;
blockIndex++;
}
}
// Encrypt a block of data.
// Length of data is equal to BlockSize().
Status ROT13BlockCipher::Encrypt(char *data) {
for (size_t i = 0; i < blockSize_; ++i) {
data[i] += 13;
}
return Status::OK();
}
// Decrypt a block of data.
// Length of data is equal to BlockSize().
Status ROT13BlockCipher::Decrypt(char *data) {
return Encrypt(data);
}
// Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
void CTRCipherStream::AllocateScratch(std::string& scratch) {
auto blockSize = cipher_.BlockSize();
scratch.reserve(blockSize);
}
// Encrypt a block of data at the given block index.
// Length of data is equal to BlockSize();
Status CTRCipherStream::EncryptBlock(uint64_t blockIndex, char *data, char* scratch) {
// Create nonce + counter
auto blockSize = cipher_.BlockSize();
memmove(scratch, iv_.data(), blockSize);
EncodeFixed64(scratch, blockIndex + initialCounter_);
// Encrypt nonce+counter
auto status = cipher_.Encrypt(scratch);
if (!status.ok()) {
return status;
}
// XOR data with ciphertext.
for (size_t i = 0; i < blockSize; i++) {
data[i] = data[i] ^ scratch[i];
}
return Status::OK();
}
// Decrypt a block of data at the given block index.
// Length of data is equal to BlockSize();
Status CTRCipherStream::DecryptBlock(uint64_t blockIndex, char *data, char* scratch) {
// For CTR decryption & encryption are the same
return EncryptBlock(blockIndex, data, scratch);
}
// GetPrefixLength returns the length of the prefix that is added to every file
// and used for storing encryption options.
// For optimal performance, the prefix length should be a multiple of
// the a page size.
size_t CTREncryptionProvider::GetPrefixLength() {
return defaultPrefixLength;
}
// decodeCTRParameters decodes the initial counter & IV from the given
// (plain text) prefix.
static void decodeCTRParameters(const char *prefix, size_t blockSize, uint64_t &initialCounter, Slice &iv) {
// First block contains 64-bit initial counter
initialCounter = DecodeFixed64(prefix);
// Second block contains IV
iv = Slice(prefix + blockSize, blockSize);
}
// CreateNewPrefix initialized an allocated block of prefix memory
// for a new file.
Status CTREncryptionProvider::CreateNewPrefix(const std::string& fname, char *prefix, size_t prefixLength) {
// Create & seed rnd.
Random rnd((uint32_t)Env::Default()->NowMicros());
// Fill entire prefix block with random values.
for (size_t i = 0; i < prefixLength; i++) {
prefix[i] = rnd.Uniform(256) & 0xFF;
}
// Take random data to extract initial counter & IV
auto blockSize = cipher_.BlockSize();
uint64_t initialCounter;
Slice prefixIV;
decodeCTRParameters(prefix, blockSize, initialCounter, prefixIV);
// Now populate the rest of the prefix, starting from the third block.
PopulateSecretPrefixPart(prefix + (2 * blockSize), prefixLength - (2 * blockSize), blockSize);
// Encrypt the prefix, starting from block 2 (leave block 0, 1 with initial counter & IV unencrypted)
CTRCipherStream cipherStream(cipher_, prefixIV.data(), initialCounter);
auto status = cipherStream.Encrypt(0, prefix + (2 * blockSize), prefixLength - (2 * blockSize));
if (!status.ok()) {
return status;
}
return Status::OK();
}
// PopulateSecretPrefixPart initializes the data into a new prefix block
// in plain text.
// Returns the amount of space (starting from the start of the prefix)
// that has been initialized.
size_t CTREncryptionProvider::PopulateSecretPrefixPart(char *prefix, size_t prefixLength, size_t blockSize) {
// Nothing to do here, put in custom data in override when needed.
return 0;
}
Status CTREncryptionProvider::CreateCipherStream(const std::string& fname, const EnvOptions& options, Slice &prefix, unique_ptr<BlockAccessCipherStream>* result) {
// Read plain text part of prefix.
auto blockSize = cipher_.BlockSize();
uint64_t initialCounter;
Slice iv;
decodeCTRParameters(prefix.data(), blockSize, initialCounter, iv);
// Decrypt the encrypted part of the prefix, starting from block 2 (block 0, 1 with initial counter & IV are unencrypted)
CTRCipherStream cipherStream(cipher_, iv.data(), initialCounter);
auto status = cipherStream.Decrypt(0, (char*)prefix.data() + (2 * blockSize), prefix.size() - (2 * blockSize));
if (!status.ok()) {
return status;
}
// Create cipher stream
return CreateCipherStreamFromPrefix(fname, options, initialCounter, iv, prefix, result);
}
// CreateCipherStreamFromPrefix creates a block access cipher stream for a file given
// given name and options. The given prefix is already decrypted.
Status CTREncryptionProvider::CreateCipherStreamFromPrefix(const std::string& fname, const EnvOptions& options,
uint64_t initialCounter, const Slice& iv, const Slice& prefix, unique_ptr<BlockAccessCipherStream>* result) {
(*result) = unique_ptr<BlockAccessCipherStream>(new CTRCipherStream(cipher_, iv.data(), initialCounter));
return Status::OK();
}
#endif // ROCKSDB_LITE
} // namespace rocksdb

View File

@ -1055,6 +1055,10 @@ class EnvWrapper : public Env {
return target_->GetThreadID(); return target_->GetThreadID();
} }
std::string GenerateUniqueId() override {
return target_->GenerateUniqueId();
}
private: private:
Env* target_; Env* target_;
}; };

View File

@ -0,0 +1,198 @@
// Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
#pragma once
#if !defined(ROCKSDB_LITE)
#include <string>
#include "env.h"
namespace rocksdb {
class EncryptionProvider;
// Returns an Env that encrypts data when stored on disk and decrypts data when
// read from disk.
Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider);
// BlockAccessCipherStream is the base class for any cipher stream that
// supports random access at block level (without requiring data from other blocks).
// E.g. CTR (Counter operation mode) supports this requirement.
class BlockAccessCipherStream {
public:
virtual ~BlockAccessCipherStream() {};
// BlockSize returns the size of each block supported by this cipher stream.
virtual size_t BlockSize() = 0;
// Encrypt one or more (partial) blocks of data at the file offset.
// Length of data is given in dataSize.
virtual Status Encrypt(uint64_t fileOffset, char *data, size_t dataSize);
// Decrypt one or more (partial) blocks of data at the file offset.
// Length of data is given in dataSize.
virtual Status Decrypt(uint64_t fileOffset, char *data, size_t dataSize);
protected:
// Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
virtual void AllocateScratch(std::string&) = 0;
// Encrypt a block of data at the given block index.
// Length of data is equal to BlockSize();
virtual Status EncryptBlock(uint64_t blockIndex, char *data, char* scratch) = 0;
// Decrypt a block of data at the given block index.
// Length of data is equal to BlockSize();
virtual Status DecryptBlock(uint64_t blockIndex, char *data, char* scratch) = 0;
};
// BlockCipher
class BlockCipher {
public:
virtual ~BlockCipher() {};
// BlockSize returns the size of each block supported by this cipher stream.
virtual size_t BlockSize() = 0;
// Encrypt a block of data.
// Length of data is equal to BlockSize().
virtual Status Encrypt(char *data) = 0;
// Decrypt a block of data.
// Length of data is equal to BlockSize().
virtual Status Decrypt(char *data) = 0;
};
// Implements a BlockCipher using ROT13.
//
// Note: This is a sample implementation of BlockCipher,
// it is NOT considered safe and should NOT be used in production.
class ROT13BlockCipher : public BlockCipher {
private:
size_t blockSize_;
public:
ROT13BlockCipher(size_t blockSize)
: blockSize_(blockSize) {}
virtual ~ROT13BlockCipher() {};
// BlockSize returns the size of each block supported by this cipher stream.
virtual size_t BlockSize() override { return blockSize_; }
// Encrypt a block of data.
// Length of data is equal to BlockSize().
virtual Status Encrypt(char *data) override;
// Decrypt a block of data.
// Length of data is equal to BlockSize().
virtual Status Decrypt(char *data) override;
};
// CTRCipherStream implements BlockAccessCipherStream using an
// Counter operations mode.
// See https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation
//
// Note: This is a possible implementation of BlockAccessCipherStream,
// it is considered suitable for use.
class CTRCipherStream final : public BlockAccessCipherStream {
private:
BlockCipher& cipher_;
std::string iv_;
uint64_t initialCounter_;
public:
CTRCipherStream(BlockCipher& c, const char *iv, uint64_t initialCounter)
: cipher_(c), iv_(iv, c.BlockSize()), initialCounter_(initialCounter) {};
virtual ~CTRCipherStream() {};
// BlockSize returns the size of each block supported by this cipher stream.
virtual size_t BlockSize() override { return cipher_.BlockSize(); }
protected:
// Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
virtual void AllocateScratch(std::string&) override;
// Encrypt a block of data at the given block index.
// Length of data is equal to BlockSize();
virtual Status EncryptBlock(uint64_t blockIndex, char *data, char *scratch) override;
// Decrypt a block of data at the given block index.
// Length of data is equal to BlockSize();
virtual Status DecryptBlock(uint64_t blockIndex, char *data, char *scratch) override;
};
// The encryption provider is used to create a cipher stream for a specific file.
// The returned cipher stream will be used for actual encryption/decryption
// actions.
class EncryptionProvider {
public:
virtual ~EncryptionProvider() {};
// GetPrefixLength returns the length of the prefix that is added to every file
// and used for storing encryption options.
// For optimal performance, the prefix length should be a multiple of
// the a page size.
virtual size_t GetPrefixLength() = 0;
// CreateNewPrefix initialized an allocated block of prefix memory
// for a new file.
virtual Status CreateNewPrefix(const std::string& fname, char *prefix, size_t prefixLength) = 0;
// CreateCipherStream creates a block access cipher stream for a file given
// given name and options.
virtual Status CreateCipherStream(const std::string& fname, const EnvOptions& options,
Slice& prefix, unique_ptr<BlockAccessCipherStream>* result) = 0;
};
// This encryption provider uses a CTR cipher stream, with a given block cipher
// and IV.
//
// Note: This is a possible implementation of EncryptionProvider,
// it is considered suitable for use, provided a safe BlockCipher is used.
class CTREncryptionProvider : public EncryptionProvider {
private:
BlockCipher& cipher_;
protected:
const static size_t defaultPrefixLength = 4096;
public:
CTREncryptionProvider(BlockCipher& c)
: cipher_(c) {};
virtual ~CTREncryptionProvider() {}
// GetPrefixLength returns the length of the prefix that is added to every file
// and used for storing encryption options.
// For optimal performance, the prefix length should be a multiple of
// the a page size.
virtual size_t GetPrefixLength() override;
// CreateNewPrefix initialized an allocated block of prefix memory
// for a new file.
virtual Status CreateNewPrefix(const std::string& fname, char *prefix, size_t prefixLength) override;
// CreateCipherStream creates a block access cipher stream for a file given
// given name and options.
virtual Status CreateCipherStream(const std::string& fname, const EnvOptions& options,
Slice& prefix, unique_ptr<BlockAccessCipherStream>* result) override;
protected:
// PopulateSecretPrefixPart initializes the data into a new prefix block
// that will be encrypted. This function will store the data in plain text.
// It will be encrypted later (before written to disk).
// Returns the amount of space (starting from the start of the prefix)
// that has been initialized.
virtual size_t PopulateSecretPrefixPart(char *prefix, size_t prefixLength, size_t blockSize);
// CreateCipherStreamFromPrefix creates a block access cipher stream for a file given
// given name and options. The given prefix is already decrypted.
virtual Status CreateCipherStreamFromPrefix(const std::string& fname, const EnvOptions& options,
uint64_t initialCounter, const Slice& iv, const Slice& prefix, unique_ptr<BlockAccessCipherStream>* result);
};
} // namespace rocksdb
#endif // !defined(ROCKSDB_LITE)

2
src.mk
View File

@ -57,6 +57,7 @@ LIB_SOURCES = \
db/write_thread.cc \ db/write_thread.cc \
env/env.cc \ env/env.cc \
env/env_chroot.cc \ env/env_chroot.cc \
env/env_encryption.cc \
env/env_hdfs.cc \ env/env_hdfs.cc \
env/env_posix.cc \ env/env_posix.cc \
env/io_posix.cc \ env/io_posix.cc \
@ -241,6 +242,7 @@ MAIN_SOURCES = \
db/db_compaction_filter_test.cc \ db/db_compaction_filter_test.cc \
db/db_compaction_test.cc \ db/db_compaction_test.cc \
db/db_dynamic_level_test.cc \ db/db_dynamic_level_test.cc \
db/db_encryption_test.cc \
db/db_flush_test.cc \ db/db_flush_test.cc \
db/db_inplace_update_test.cc \ db/db_inplace_update_test.cc \
db/db_io_failure_test.cc \ db/db_io_failure_test.cc \