Implement PositionedAppend for PosixWritableFile
Summary: This patch clarifies the contract of PositionedAppend with some unit tests and also implements it for PosixWritableFile. (Tasks: 14524071) Closes https://github.com/facebook/rocksdb/pull/1514 Differential Revision: D4204907 Pulled By: maysamyabandeh fbshipit-source-id: 06eabd2
This commit is contained in:
parent
3f62215210
commit
9d60151b04
@ -518,10 +518,31 @@ class WritableFile {
|
|||||||
return c_DefaultPageSize;
|
return c_DefaultPageSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Append data to the end of the file
|
||||||
|
// Note: A WriteabelFile object must support either Append or
|
||||||
|
// PositionedAppend, so the users cannot mix the two.
|
||||||
virtual Status Append(const Slice& data) = 0;
|
virtual Status Append(const Slice& data) = 0;
|
||||||
|
|
||||||
// Positioned write for unbuffered access default forward
|
// PositionedAppend data to the specified offset. The new EOF after append
|
||||||
// to simple append as most of the tests are buffered by default
|
// must be larger than the previous EOF. This is to be used when writes are
|
||||||
|
// not backed by OS buffers and hence has to always start from the start of
|
||||||
|
// the sector. The implementation thus needs to also rewrite the last
|
||||||
|
// partial sector.
|
||||||
|
// Note: PositionAppend does not guarantee moving the file offset after the
|
||||||
|
// write. A WriteabelFile object must support either Append or
|
||||||
|
// PositionedAppend, so the users cannot mix the two.
|
||||||
|
//
|
||||||
|
// PositionedAppend() can only happen on the page/sector boundaries. For that
|
||||||
|
// reason, if the last write was an incomplete sector we still need to rewind
|
||||||
|
// back to the nearest sector/page and rewrite the portion of it with whatever
|
||||||
|
// we need to add. We need to keep where we stop writing.
|
||||||
|
//
|
||||||
|
// PositionedAppend() can only write whole sectors. For that reason we have to
|
||||||
|
// pad with zeros for the last write and trim the file when closing according
|
||||||
|
// to the position we keep in the previous step.
|
||||||
|
//
|
||||||
|
// PositionedAppend() requires aligned buffer to be passed in. The alignment
|
||||||
|
// required is queried via GetRequiredBufferAlignment()
|
||||||
virtual Status PositionedAppend(const Slice& /* data */, uint64_t /* offset */) {
|
virtual Status PositionedAppend(const Slice& /* data */, uint64_t /* offset */) {
|
||||||
return Status::NotSupported();
|
return Status::NotSupported();
|
||||||
}
|
}
|
||||||
|
@ -272,7 +272,14 @@ class PosixEnv : public Env {
|
|||||||
#ifdef OS_MACOSX
|
#ifdef OS_MACOSX
|
||||||
int flags = O_WRONLY | O_APPEND | O_TRUNC | O_CREAT;
|
int flags = O_WRONLY | O_APPEND | O_TRUNC | O_CREAT;
|
||||||
#else
|
#else
|
||||||
int flags = O_WRONLY | O_APPEND | O_TRUNC | O_CREAT | O_DIRECT;
|
// Note: we should avoid O_APPEND here due to ta the following bug:
|
||||||
|
// POSIX requires that opening a file with the O_APPEND flag should
|
||||||
|
// have no affect on the location at which pwrite() writes data.
|
||||||
|
// However, on Linux, if a file is opened with O_APPEND, pwrite()
|
||||||
|
// appends data to the end of the file, regardless of the value of
|
||||||
|
// offset.
|
||||||
|
// More info here: https://linux.die.net/man/2/pwrite
|
||||||
|
int flags = O_WRONLY | O_TRUNC | O_CREAT | O_DIRECT;
|
||||||
#endif
|
#endif
|
||||||
TEST_SYNC_POINT_CALLBACK("NewWritableFile:O_DIRECT", &flags);
|
TEST_SYNC_POINT_CALLBACK("NewWritableFile:O_DIRECT", &flags);
|
||||||
fd = open(fname.c_str(), flags, 0644);
|
fd = open(fname.c_str(), flags, 0644);
|
||||||
|
@ -640,6 +640,39 @@ class IoctlFriendlyTmpdir {
|
|||||||
std::string dir_;
|
std::string dir_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
TEST_F(EnvPosixTest, PositionedAppend) {
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
|
||||||
|
EnvOptions options;
|
||||||
|
options.use_direct_writes = true;
|
||||||
|
options.use_mmap_writes = false;
|
||||||
|
IoctlFriendlyTmpdir ift;
|
||||||
|
ASSERT_OK(env_->NewWritableFile(ift.name() + "/f", &writable_file, options));
|
||||||
|
|
||||||
|
const size_t kBlockSize = 512;
|
||||||
|
const size_t kPageSize = 4096;
|
||||||
|
const size_t kDataSize = kPageSize;
|
||||||
|
// Write a page worth of 'a'
|
||||||
|
auto data_ptr = NewAligned(kDataSize, 'a');
|
||||||
|
Slice data_a(data_ptr.get(), kDataSize);
|
||||||
|
ASSERT_OK(writable_file->PositionedAppend(data_a, 0U));
|
||||||
|
// Write a page worth of 'b' right after the first sector
|
||||||
|
data_ptr = NewAligned(kDataSize, 'b');
|
||||||
|
Slice data_b(data_ptr.get(), kDataSize);
|
||||||
|
ASSERT_OK(writable_file->PositionedAppend(data_b, kBlockSize));
|
||||||
|
ASSERT_OK(writable_file->Close());
|
||||||
|
// The file now has 1 sector worth of a followed by a page worth of b
|
||||||
|
|
||||||
|
// Verify the above
|
||||||
|
unique_ptr<SequentialFile> seq_file;
|
||||||
|
ASSERT_OK(env_->NewSequentialFile(ift.name() + "/f", &seq_file, options));
|
||||||
|
char scratch[kPageSize * 2];
|
||||||
|
Slice result;
|
||||||
|
ASSERT_OK(seq_file->Read(sizeof(scratch), &result, scratch));
|
||||||
|
ASSERT_EQ(kPageSize + kBlockSize, result.size());
|
||||||
|
ASSERT_EQ('a', result[kBlockSize - 1]);
|
||||||
|
ASSERT_EQ('b', result[kBlockSize]);
|
||||||
|
}
|
||||||
|
|
||||||
// Only works in linux platforms
|
// Only works in linux platforms
|
||||||
TEST_F(EnvPosixTest, RandomAccessUniqueID) {
|
TEST_F(EnvPosixTest, RandomAccessUniqueID) {
|
||||||
|
@ -692,6 +692,26 @@ Status PosixWritableFile::Append(const Slice& data) {
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status PosixWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
|
||||||
|
assert(offset <= std::numeric_limits<off_t>::max());
|
||||||
|
const char* src = data.data();
|
||||||
|
size_t left = data.size();
|
||||||
|
while (left != 0) {
|
||||||
|
ssize_t done = pwrite(fd_, src, left, static_cast<off_t>(offset));
|
||||||
|
if (done < 0) {
|
||||||
|
if (errno == EINTR) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return IOError(filename_, errno);
|
||||||
|
}
|
||||||
|
left -= done;
|
||||||
|
offset += done;
|
||||||
|
src += done;
|
||||||
|
}
|
||||||
|
filesize_ = offset + data.size();
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
Status PosixWritableFile::Close() {
|
Status PosixWritableFile::Close() {
|
||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
|
@ -125,6 +125,7 @@ class PosixWritableFile : public WritableFile {
|
|||||||
virtual Status Truncate(uint64_t size) override { return Status::OK(); }
|
virtual Status Truncate(uint64_t size) override { return Status::OK(); }
|
||||||
virtual Status Close() override;
|
virtual Status Close() override;
|
||||||
virtual Status Append(const Slice& data) override;
|
virtual Status Append(const Slice& data) override;
|
||||||
|
virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
|
||||||
virtual Status Flush() override;
|
virtual Status Flush() override;
|
||||||
virtual Status Sync() override;
|
virtual Status Sync() override;
|
||||||
virtual Status Fsync() override;
|
virtual Status Fsync() override;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user