Env function for bulk metadata retrieval

Summary:
Added this new function, which returns filename, size, and modified
timestamp for each file in the provided directory. The default implementation
retrieves the metadata sequentially using existing functions. In the next diff
I'll make HdfsEnv override this function to use libhdfs's bulk get function.

This won't work on windows due to the path separator.

Test Plan:
new unit test

  $ ./env_test --gtest_filter=EnvPosixTest.ConsistentChildrenMetadata

Reviewers: yhchiang, sdong

Reviewed By: sdong

Subscribers: IslamAbdelRahman, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D53781
This commit is contained in:
Andrew Kryczka 2016-02-09 14:54:32 -08:00
parent 4a8cbf4e31
commit 59b3ee658f
4 changed files with 90 additions and 3 deletions

View File

@ -103,6 +103,14 @@ struct EnvOptions {
class Env {
public:
struct FileAttributes {
// File name
std::string name;
// Size of file in bytes
uint64_t size_bytes;
};
Env() : thread_status_updater_(nullptr) {}
virtual ~Env();
@ -177,6 +185,15 @@ class Env {
virtual Status GetChildren(const std::string& dir,
std::vector<std::string>* result) = 0;
// Store in *result the attributes of the children of the specified directory.
// In case the implementation lists the directory prior to iterating the files
// and files are concurrently deleted, the deleted files will be omitted from
// result.
// The name attributes are relative to "dir".
// Original contents of *results are dropped.
virtual Status GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result);
// Delete the named file.
virtual Status DeleteFile(const std::string& fname) = 0;
@ -789,6 +806,10 @@ class EnvWrapper : public Env {
std::vector<std::string>* r) override {
return target_->GetChildren(dir, r);
}
Status GetChildrenFileAttributes(
const std::string& dir, std::vector<FileAttributes>* result) override {
return target_->GetChildrenFileAttributes(dir, result);
}
Status DeleteFile(const std::string& f) override {
return target_->DeleteFile(f);
}

View File

@ -1415,6 +1415,11 @@ class WinEnv : public Env {
return status;
}
virtual Status Env::GetChildrenFileMetadata(
const std::string& dir, std::vector<FileMetadata>* result) override {
return Status::NotSupported("Not supported in WinEnv");
}
virtual Status CreateDir(const std::string& name) override {
Status result;
@ -1723,9 +1728,8 @@ class WinEnv : public Env {
virtual Status GetHostName(char* name, uint64_t len) override {
Status s;
DWORD nSize =
static_cast<DWORD>(std::min<uint64_t>(len,
std::numeric_limits<DWORD>::max()));
DWORD nSize = static_cast<DWORD>(
std::min<uint64_t>(len, std::numeric_limits<DWORD>::max()));
if (!::GetComputerNameA(name, &nSize)) {
auto lastError = GetLastError();

View File

@ -38,6 +38,32 @@ Status Env::ReuseWritableFile(const std::string& fname,
return NewWritableFile(fname, result, options);
}
Status Env::GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result) {
assert(result != nullptr);
std::vector<std::string> child_fnames;
Status s = GetChildren(dir, &child_fnames);
if (!s.ok()) {
return s;
}
result->resize(child_fnames.size());
size_t result_size = 0;
for (size_t i = 0; i < child_fnames.size(); ++i) {
const std::string path = dir + "/" + child_fnames[i];
if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) {
if (FileExists(path).IsNotFound()) {
// The file may have been deleted since we listed the directory
continue;
}
return s;
}
(*result)[result_size].name = std::move(child_fnames[i]);
result_size++;
}
result->resize(result_size);
return Status::OK();
}
SequentialFile::~SequentialFile() {
}

View File

@ -935,6 +935,42 @@ TEST_F(EnvPosixTest, Preallocation) {
ASSERT_EQ(last_allocated_block, 7UL);
}
// Test that the two ways to get children file attributes (in bulk or
// individually) behave consistently.
TEST_F(EnvPosixTest, ConsistentChildrenAttributes) {
const EnvOptions soptions;
const int kNumChildren = 10;
std::string data;
for (int i = 0; i < kNumChildren; ++i) {
std::ostringstream oss;
oss << test::TmpDir() << "/testfile_" << i;
const std::string path = oss.str();
unique_ptr<WritableFile> file;
ASSERT_OK(env_->NewWritableFile(path, &file, soptions));
file->Append(data);
data.append("test");
}
std::vector<Env::FileAttributes> file_attrs;
ASSERT_OK(env_->GetChildrenFileAttributes(test::TmpDir(), &file_attrs));
for (int i = 0; i < kNumChildren; ++i) {
std::ostringstream oss;
oss << "testfile_" << i;
const std::string name = oss.str();
const std::string path = test::TmpDir() + "/" + name;
auto file_attrs_iter = std::find_if(
file_attrs.begin(), file_attrs.end(),
[&name](const Env::FileAttributes& fm) { return fm.name == name; });
ASSERT_TRUE(file_attrs_iter != file_attrs.end());
uint64_t size;
ASSERT_OK(env_->GetFileSize(path, &size));
ASSERT_EQ(size, 4 * i);
ASSERT_EQ(size, file_attrs_iter->size_bytes);
}
}
// Test that all WritableFileWrapper forwards all calls to WritableFile.
TEST_F(EnvPosixTest, WritableFileWrapper) {
class Base : public WritableFile {