Env function for bulk metadata retrieval
Summary: Added this new function, which returns filename, size, and modified timestamp for each file in the provided directory. The default implementation retrieves the metadata sequentially using existing functions. In the next diff I'll make HdfsEnv override this function to use libhdfs's bulk get function. This won't work on windows due to the path separator. Test Plan: new unit test $ ./env_test --gtest_filter=EnvPosixTest.ConsistentChildrenMetadata Reviewers: yhchiang, sdong Reviewed By: sdong Subscribers: IslamAbdelRahman, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53781
This commit is contained in:
parent
4a8cbf4e31
commit
59b3ee658f
@ -103,6 +103,14 @@ struct EnvOptions {
|
||||
|
||||
class Env {
|
||||
public:
|
||||
struct FileAttributes {
|
||||
// File name
|
||||
std::string name;
|
||||
|
||||
// Size of file in bytes
|
||||
uint64_t size_bytes;
|
||||
};
|
||||
|
||||
Env() : thread_status_updater_(nullptr) {}
|
||||
|
||||
virtual ~Env();
|
||||
@ -177,6 +185,15 @@ class Env {
|
||||
virtual Status GetChildren(const std::string& dir,
|
||||
std::vector<std::string>* result) = 0;
|
||||
|
||||
// Store in *result the attributes of the children of the specified directory.
|
||||
// In case the implementation lists the directory prior to iterating the files
|
||||
// and files are concurrently deleted, the deleted files will be omitted from
|
||||
// result.
|
||||
// The name attributes are relative to "dir".
|
||||
// Original contents of *results are dropped.
|
||||
virtual Status GetChildrenFileAttributes(const std::string& dir,
|
||||
std::vector<FileAttributes>* result);
|
||||
|
||||
// Delete the named file.
|
||||
virtual Status DeleteFile(const std::string& fname) = 0;
|
||||
|
||||
@ -789,6 +806,10 @@ class EnvWrapper : public Env {
|
||||
std::vector<std::string>* r) override {
|
||||
return target_->GetChildren(dir, r);
|
||||
}
|
||||
Status GetChildrenFileAttributes(
|
||||
const std::string& dir, std::vector<FileAttributes>* result) override {
|
||||
return target_->GetChildrenFileAttributes(dir, result);
|
||||
}
|
||||
Status DeleteFile(const std::string& f) override {
|
||||
return target_->DeleteFile(f);
|
||||
}
|
||||
|
@ -1415,6 +1415,11 @@ class WinEnv : public Env {
|
||||
return status;
|
||||
}
|
||||
|
||||
virtual Status Env::GetChildrenFileMetadata(
|
||||
const std::string& dir, std::vector<FileMetadata>* result) override {
|
||||
return Status::NotSupported("Not supported in WinEnv");
|
||||
}
|
||||
|
||||
virtual Status CreateDir(const std::string& name) override {
|
||||
Status result;
|
||||
|
||||
@ -1723,9 +1728,8 @@ class WinEnv : public Env {
|
||||
|
||||
virtual Status GetHostName(char* name, uint64_t len) override {
|
||||
Status s;
|
||||
DWORD nSize =
|
||||
static_cast<DWORD>(std::min<uint64_t>(len,
|
||||
std::numeric_limits<DWORD>::max()));
|
||||
DWORD nSize = static_cast<DWORD>(
|
||||
std::min<uint64_t>(len, std::numeric_limits<DWORD>::max()));
|
||||
|
||||
if (!::GetComputerNameA(name, &nSize)) {
|
||||
auto lastError = GetLastError();
|
||||
|
26
util/env.cc
26
util/env.cc
@ -38,6 +38,32 @@ Status Env::ReuseWritableFile(const std::string& fname,
|
||||
return NewWritableFile(fname, result, options);
|
||||
}
|
||||
|
||||
Status Env::GetChildrenFileAttributes(const std::string& dir,
|
||||
std::vector<FileAttributes>* result) {
|
||||
assert(result != nullptr);
|
||||
std::vector<std::string> child_fnames;
|
||||
Status s = GetChildren(dir, &child_fnames);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
result->resize(child_fnames.size());
|
||||
size_t result_size = 0;
|
||||
for (size_t i = 0; i < child_fnames.size(); ++i) {
|
||||
const std::string path = dir + "/" + child_fnames[i];
|
||||
if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) {
|
||||
if (FileExists(path).IsNotFound()) {
|
||||
// The file may have been deleted since we listed the directory
|
||||
continue;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
(*result)[result_size].name = std::move(child_fnames[i]);
|
||||
result_size++;
|
||||
}
|
||||
result->resize(result_size);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
SequentialFile::~SequentialFile() {
|
||||
}
|
||||
|
||||
|
@ -935,6 +935,42 @@ TEST_F(EnvPosixTest, Preallocation) {
|
||||
ASSERT_EQ(last_allocated_block, 7UL);
|
||||
}
|
||||
|
||||
// Test that the two ways to get children file attributes (in bulk or
|
||||
// individually) behave consistently.
|
||||
TEST_F(EnvPosixTest, ConsistentChildrenAttributes) {
|
||||
const EnvOptions soptions;
|
||||
const int kNumChildren = 10;
|
||||
|
||||
std::string data;
|
||||
for (int i = 0; i < kNumChildren; ++i) {
|
||||
std::ostringstream oss;
|
||||
oss << test::TmpDir() << "/testfile_" << i;
|
||||
const std::string path = oss.str();
|
||||
unique_ptr<WritableFile> file;
|
||||
ASSERT_OK(env_->NewWritableFile(path, &file, soptions));
|
||||
file->Append(data);
|
||||
data.append("test");
|
||||
}
|
||||
|
||||
std::vector<Env::FileAttributes> file_attrs;
|
||||
ASSERT_OK(env_->GetChildrenFileAttributes(test::TmpDir(), &file_attrs));
|
||||
for (int i = 0; i < kNumChildren; ++i) {
|
||||
std::ostringstream oss;
|
||||
oss << "testfile_" << i;
|
||||
const std::string name = oss.str();
|
||||
const std::string path = test::TmpDir() + "/" + name;
|
||||
|
||||
auto file_attrs_iter = std::find_if(
|
||||
file_attrs.begin(), file_attrs.end(),
|
||||
[&name](const Env::FileAttributes& fm) { return fm.name == name; });
|
||||
ASSERT_TRUE(file_attrs_iter != file_attrs.end());
|
||||
uint64_t size;
|
||||
ASSERT_OK(env_->GetFileSize(path, &size));
|
||||
ASSERT_EQ(size, 4 * i);
|
||||
ASSERT_EQ(size, file_attrs_iter->size_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
// Test that all WritableFileWrapper forwards all calls to WritableFile.
|
||||
TEST_F(EnvPosixTest, WritableFileWrapper) {
|
||||
class Base : public WritableFile {
|
||||
|
Loading…
x
Reference in New Issue
Block a user