Column family support for DB::OpenForReadOnly()

Summary: When opening DB in read-only mode, client can choose to only specify a subset of column families ("default" column family can't be omitted, though)

Test Plan: added a unit test in column_family_test

Reviewers: haobo, sdong, ljin, dhruba

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D17565
This commit is contained in:
Igor Canadi 2014-04-09 09:56:17 -07:00
parent 0f5cbcd798
commit b947fdc89d
9 changed files with 131 additions and 29 deletions

View File

@ -60,6 +60,25 @@ class ColumnFamilyTest {
return DB::Open(db_options_, dbname_, column_families, &handles_, &db_);
}
Status OpenReadOnly(std::vector<std::string> cf,
std::vector<ColumnFamilyOptions> options = {}) {
std::vector<ColumnFamilyDescriptor> column_families;
names_.clear();
for (size_t i = 0; i < cf.size(); ++i) {
column_families.push_back(ColumnFamilyDescriptor(
cf[i], options.size() == 0 ? column_family_options_ : options[i]));
names_.push_back(cf[i]);
}
return DB::OpenForReadOnly(db_options_, dbname_, column_families, &handles_,
&db_);
}
void AssertOpenReadOnly(std::vector<std::string> cf,
std::vector<ColumnFamilyOptions> options = {}) {
ASSERT_OK(OpenReadOnly(cf, options));
}
void Open(std::vector<std::string> cf,
std::vector<ColumnFamilyOptions> options = {}) {
ASSERT_OK(TryOpen(cf, options));
@ -850,6 +869,32 @@ TEST(ColumnFamilyTest, NewIteratorsTest) {
}
}
TEST(ColumnFamilyTest, ReadOnlyDBTest) {
Open();
CreateColumnFamiliesAndReopen({"one", "two", "three", "four"});
ASSERT_OK(Put(1, "foo", "bla"));
ASSERT_OK(Put(2, "foo", "blabla"));
ASSERT_OK(Put(3, "foo", "blablabla"));
ASSERT_OK(Put(4, "foo", "blablablabla"));
DropColumnFamilies({2});
Close();
// open only a subset of column families
AssertOpenReadOnly({"default", "one", "four"});
ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
ASSERT_EQ("bla", Get(1, "foo"));
ASSERT_EQ("blablablabla", Get(2, "foo"));
Close();
// can't open dropped column family
Status s = OpenReadOnly({"default", "one", "two"});
ASSERT_TRUE(!s.ok());
// Can't open without specifying default column family
s = OpenReadOnly({"one", "four"});
ASSERT_TRUE(!s.ok());
}
} // namespace rocksdb
int main(int argc, char** argv) {

View File

@ -70,7 +70,7 @@
namespace rocksdb {
const std::string default_column_family_name("default");
const std::string kDefaultColumnFamilyName("default");
void DumpLeveldbBuildVersion(Logger * log);
@ -949,7 +949,7 @@ Status DBImpl::Recover(
}
}
Status s = versions_->Recover(column_families);
Status s = versions_->Recover(column_families, read_only);
if (options_.paranoid_checks && s.ok()) {
s = CheckConsistency();
}
@ -4498,7 +4498,7 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
ColumnFamilyOptions cf_options(options);
std::vector<ColumnFamilyDescriptor> column_families;
column_families.push_back(
ColumnFamilyDescriptor(default_column_family_name, cf_options));
ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
std::vector<ColumnFamilyHandle*> handles;
Status s = DB::Open(db_options, dbname, column_families, &handles, dbptr);
if (s.ok()) {
@ -4568,8 +4568,8 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
if (s.ok()) {
for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
delete cfd->InstallSuperVersion(new SuperVersion(), &impl->mutex_);
impl->alive_log_files_.push_back(impl->logfile_number_);
}
impl->alive_log_files_.push_back(impl->logfile_number_);
impl->DeleteObsoleteFiles();
impl->MaybeScheduleFlushOrCompaction();
impl->MaybeScheduleLogDBDeployStats();

View File

@ -94,12 +94,44 @@ Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,
ColumnFamilyOptions cf_options(options);
std::vector<ColumnFamilyDescriptor> column_families;
column_families.push_back(
ColumnFamilyDescriptor(default_column_family_name, cf_options));
ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
std::vector<ColumnFamilyHandle*> handles;
Status s =
DB::OpenForReadOnly(db_options, dbname, column_families, &handles, dbptr);
if (s.ok()) {
assert(handles.size() == 1);
// i can delete the handle since DBImpl is always holding a
// reference to default column family
delete handles[0];
}
return s;
}
Status DB::OpenForReadOnly(
const DBOptions& db_options, const std::string& dbname,
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
bool error_if_log_file_exist) {
*dbptr = nullptr;
handles->clear();
DBImplReadOnly* impl = new DBImplReadOnly(db_options, dbname);
impl->mutex_.Lock();
Status s = impl->Recover(column_families, true /* read only */,
error_if_log_file_exist);
if (s.ok()) {
// set column family handles
for (auto cf : column_families) {
auto cfd =
impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
if (cfd == nullptr) {
s = Status::InvalidArgument("Column family not found: ", cf.name);
break;
}
handles->push_back(new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
}
}
if (s.ok()) {
for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
delete cfd->InstallSuperVersion(new SuperVersion(), &impl->mutex_);
@ -109,9 +141,14 @@ Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,
if (s.ok()) {
*dbptr = impl;
} else {
for (auto h : *handles) {
delete h;
}
handles->clear();
delete impl;
}
return s;
}
} // namespace rocksdb

View File

@ -468,8 +468,7 @@ class DBTest {
const Options* options = nullptr) {
CreateColumnFamilies(cfs, options);
std::vector<std::string> cfs_plus_default = cfs;
cfs_plus_default.insert(cfs_plus_default.begin(),
default_column_family_name);
cfs_plus_default.insert(cfs_plus_default.begin(), kDefaultColumnFamilyName);
ReopenWithColumnFamilies(cfs_plus_default, options);
}

View File

@ -1813,7 +1813,8 @@ void VersionSet::LogAndApplyHelper(ColumnFamilyData* cfd, Builder* builder,
}
Status VersionSet::Recover(
const std::vector<ColumnFamilyDescriptor>& column_families) {
const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only) {
std::unordered_map<std::string, ColumnFamilyOptions> cf_name_to_options;
for (auto cf : column_families) {
cf_name_to_options.insert({cf.name, cf.options});
@ -1872,12 +1873,12 @@ Status VersionSet::Recover(
std::unordered_map<uint32_t, Builder*> builders;
// add default column family
auto default_cf_iter = cf_name_to_options.find(default_column_family_name);
auto default_cf_iter = cf_name_to_options.find(kDefaultColumnFamilyName);
if (default_cf_iter == cf_name_to_options.end()) {
return Status::InvalidArgument("Default column family not specified");
}
VersionEdit default_cf_edit;
default_cf_edit.AddColumnFamily(default_column_family_name);
default_cf_edit.AddColumnFamily(kDefaultColumnFamilyName);
default_cf_edit.SetColumnFamily(0);
ColumnFamilyData* default_cfd =
CreateColumnFamily(default_cf_iter->second, &default_cf_edit);
@ -2034,11 +2035,16 @@ Status VersionSet::Recover(
}
// there were some column families in the MANIFEST that weren't specified
// in the argument
if (column_families_not_found.size() > 0) {
// in the argument. This is OK in read_only mode
if (read_only == false && column_families_not_found.size() > 0) {
std::string list_of_not_found;
for (auto cf : column_families_not_found) {
list_of_not_found += ", " + cf;
}
list_of_not_found = list_of_not_found.substr(2);
s = Status::InvalidArgument(
"Found unexpected column families. You have to specify all column "
"families when opening the DB");
"You have to open all column families. Column families not opened: %s",
list_of_not_found.c_str());
}
if (s.ok()) {
@ -2121,7 +2127,7 @@ Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
std::map<uint32_t, std::string> column_family_names;
// default column family is always implicitly there
column_family_names.insert({0, default_column_family_name});
column_family_names.insert({0, kDefaultColumnFamilyName});
VersionSet::LogReporter reporter;
reporter.status = &s;
log::Reader reader(std::move(file), &reporter, true /*checksum*/,
@ -2180,7 +2186,7 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
Status status;
std::vector<ColumnFamilyDescriptor> dummy;
ColumnFamilyDescriptor dummy_descriptor(default_column_family_name,
ColumnFamilyDescriptor dummy_descriptor(kDefaultColumnFamilyName,
ColumnFamilyOptions(*options));
dummy.push_back(dummy_descriptor);
status = versions.Recover(dummy);
@ -2264,7 +2270,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
// add default column family
VersionEdit default_cf_edit;
default_cf_edit.AddColumnFamily(default_column_family_name);
default_cf_edit.AddColumnFamily(kDefaultColumnFamilyName);
default_cf_edit.SetColumnFamily(0);
ColumnFamilyData* default_cfd =
CreateColumnFamily(ColumnFamilyOptions(options), &default_cf_edit);

View File

@ -309,7 +309,10 @@ class VersionSet {
nullptr);
// Recover the last saved descriptor from persistent storage.
Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families);
// If read_only == true, Recover() will not complain if some column families
// are not opened
Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only = false);
// Reads a manifest file and returns a list of column families in
// column_families.

View File

@ -28,13 +28,13 @@ class ColumnFamilyHandle {
public:
virtual ~ColumnFamilyHandle() {}
};
extern const std::string default_column_family_name;
extern const std::string kDefaultColumnFamilyName;
struct ColumnFamilyDescriptor {
std::string name;
ColumnFamilyOptions options;
ColumnFamilyDescriptor()
: name(default_column_family_name), options(ColumnFamilyOptions()) {}
: name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
ColumnFamilyDescriptor(const std::string& name,
const ColumnFamilyOptions& options)
: name(name), options(options) {}
@ -104,18 +104,30 @@ class DB {
// that modify data, like put/delete, will return error.
// If the db is opened in read only mode, then no compactions
// will happen.
// TODO(icanadi): implement OpenForReadOnly that specifies column families.
// User can open DB in read-only mode even if not specifying all column
// families
static Status OpenForReadOnly(const Options& options,
const std::string& name, DB** dbptr,
bool error_if_log_file_exist = false);
// Open the database for read only with column families. When opening DB with
// read only, you can specify only a subset of column families in the
// database that should be opened. However, you always need to specify default
// column family. The default column family name is 'default' and it's stored
// in rocksdb::kDefaultColumnFamilyName
static Status OpenForReadOnly(
const DBOptions& db_options, const std::string& name,
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
bool error_if_log_file_exist = false);
// Open DB with column families.
// db_options specify database specific options
// column_families is the vector of all column families you'd like to open,
// containing column family name and options. The default column family name
// is 'default'.
// column_families is the vector of all column families in the databse,
// containing column family name and options. You need to open ALL column
// families in the database. To get the list of column families, you can use
// ListColumnFamilies(). Also, you can open only a subset of column families
// for read-only access.
// The default column family name is 'default' and it's stored
// in rocksdb::kDefaultColumnFamilyName.
// If everything is OK, handles will on return be the same size
// as column_families --- handles[i] will be a handle that you
// will use to operate on column family column_family[i]

View File

@ -1519,7 +1519,7 @@ class StressTest {
// DB doesn't exist
assert(existing_column_families.empty());
assert(column_family_names_.empty());
column_family_names_.push_back(default_column_family_name);
column_family_names_.push_back(kDefaultColumnFamilyName);
} else if (column_family_names_.empty()) {
// this is the first call to the function Open()
column_family_names_ = existing_column_families;
@ -1547,7 +1547,7 @@ class StressTest {
}
std::vector<ColumnFamilyDescriptor> cf_descriptors;
for (auto name : column_family_names_) {
if (name != default_column_family_name) {
if (name != kDefaultColumnFamilyName) {
new_column_family_name_ =
std::max(new_column_family_name_.load(), std::stoi(name) + 1);
}

View File

@ -1068,7 +1068,7 @@ Status ReduceDBLevelsCommand::GetOldNumOfLevels(Options& opt,
const InternalKeyComparator cmp(opt.comparator);
VersionSet versions(db_path_, &opt, soptions, tc.get());
std::vector<ColumnFamilyDescriptor> dummy;
ColumnFamilyDescriptor dummy_descriptor(default_column_family_name,
ColumnFamilyDescriptor dummy_descriptor(kDefaultColumnFamilyName,
ColumnFamilyOptions(opt));
dummy.push_back(dummy_descriptor);
// We rely the VersionSet::Recover to tell us the internal data structures