Implement PositionedAppend for PosixWritableFile
Summary: This patch clarifies the contract of PositionedAppend with some unit tests and also implements it for PosixWritableFile. (Tasks: 14524071) Closes https://github.com/facebook/rocksdb/pull/1514 Differential Revision: D4204907 Pulled By: maysamyabandeh fbshipit-source-id: 06eabd2
This commit is contained in:
parent
3f62215210
commit
9d60151b04
@ -518,10 +518,31 @@ class WritableFile {
|
||||
return c_DefaultPageSize;
|
||||
}
|
||||
|
||||
// Append data to the end of the file
|
||||
// Note: A WriteabelFile object must support either Append or
|
||||
// PositionedAppend, so the users cannot mix the two.
|
||||
virtual Status Append(const Slice& data) = 0;
|
||||
|
||||
// Positioned write for unbuffered access default forward
|
||||
// to simple append as most of the tests are buffered by default
|
||||
// PositionedAppend data to the specified offset. The new EOF after append
|
||||
// must be larger than the previous EOF. This is to be used when writes are
|
||||
// not backed by OS buffers and hence has to always start from the start of
|
||||
// the sector. The implementation thus needs to also rewrite the last
|
||||
// partial sector.
|
||||
// Note: PositionAppend does not guarantee moving the file offset after the
|
||||
// write. A WriteabelFile object must support either Append or
|
||||
// PositionedAppend, so the users cannot mix the two.
|
||||
//
|
||||
// PositionedAppend() can only happen on the page/sector boundaries. For that
|
||||
// reason, if the last write was an incomplete sector we still need to rewind
|
||||
// back to the nearest sector/page and rewrite the portion of it with whatever
|
||||
// we need to add. We need to keep where we stop writing.
|
||||
//
|
||||
// PositionedAppend() can only write whole sectors. For that reason we have to
|
||||
// pad with zeros for the last write and trim the file when closing according
|
||||
// to the position we keep in the previous step.
|
||||
//
|
||||
// PositionedAppend() requires aligned buffer to be passed in. The alignment
|
||||
// required is queried via GetRequiredBufferAlignment()
|
||||
virtual Status PositionedAppend(const Slice& /* data */, uint64_t /* offset */) {
|
||||
return Status::NotSupported();
|
||||
}
|
||||
|
@ -272,7 +272,14 @@ class PosixEnv : public Env {
|
||||
#ifdef OS_MACOSX
|
||||
int flags = O_WRONLY | O_APPEND | O_TRUNC | O_CREAT;
|
||||
#else
|
||||
int flags = O_WRONLY | O_APPEND | O_TRUNC | O_CREAT | O_DIRECT;
|
||||
// Note: we should avoid O_APPEND here due to ta the following bug:
|
||||
// POSIX requires that opening a file with the O_APPEND flag should
|
||||
// have no affect on the location at which pwrite() writes data.
|
||||
// However, on Linux, if a file is opened with O_APPEND, pwrite()
|
||||
// appends data to the end of the file, regardless of the value of
|
||||
// offset.
|
||||
// More info here: https://linux.die.net/man/2/pwrite
|
||||
int flags = O_WRONLY | O_TRUNC | O_CREAT | O_DIRECT;
|
||||
#endif
|
||||
TEST_SYNC_POINT_CALLBACK("NewWritableFile:O_DIRECT", &flags);
|
||||
fd = open(fname.c_str(), flags, 0644);
|
||||
|
@ -640,6 +640,39 @@ class IoctlFriendlyTmpdir {
|
||||
std::string dir_;
|
||||
};
|
||||
|
||||
TEST_F(EnvPosixTest, PositionedAppend) {
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
|
||||
EnvOptions options;
|
||||
options.use_direct_writes = true;
|
||||
options.use_mmap_writes = false;
|
||||
IoctlFriendlyTmpdir ift;
|
||||
ASSERT_OK(env_->NewWritableFile(ift.name() + "/f", &writable_file, options));
|
||||
|
||||
const size_t kBlockSize = 512;
|
||||
const size_t kPageSize = 4096;
|
||||
const size_t kDataSize = kPageSize;
|
||||
// Write a page worth of 'a'
|
||||
auto data_ptr = NewAligned(kDataSize, 'a');
|
||||
Slice data_a(data_ptr.get(), kDataSize);
|
||||
ASSERT_OK(writable_file->PositionedAppend(data_a, 0U));
|
||||
// Write a page worth of 'b' right after the first sector
|
||||
data_ptr = NewAligned(kDataSize, 'b');
|
||||
Slice data_b(data_ptr.get(), kDataSize);
|
||||
ASSERT_OK(writable_file->PositionedAppend(data_b, kBlockSize));
|
||||
ASSERT_OK(writable_file->Close());
|
||||
// The file now has 1 sector worth of a followed by a page worth of b
|
||||
|
||||
// Verify the above
|
||||
unique_ptr<SequentialFile> seq_file;
|
||||
ASSERT_OK(env_->NewSequentialFile(ift.name() + "/f", &seq_file, options));
|
||||
char scratch[kPageSize * 2];
|
||||
Slice result;
|
||||
ASSERT_OK(seq_file->Read(sizeof(scratch), &result, scratch));
|
||||
ASSERT_EQ(kPageSize + kBlockSize, result.size());
|
||||
ASSERT_EQ('a', result[kBlockSize - 1]);
|
||||
ASSERT_EQ('b', result[kBlockSize]);
|
||||
}
|
||||
|
||||
// Only works in linux platforms
|
||||
TEST_F(EnvPosixTest, RandomAccessUniqueID) {
|
||||
|
@ -692,6 +692,26 @@ Status PosixWritableFile::Append(const Slice& data) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PosixWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
|
||||
assert(offset <= std::numeric_limits<off_t>::max());
|
||||
const char* src = data.data();
|
||||
size_t left = data.size();
|
||||
while (left != 0) {
|
||||
ssize_t done = pwrite(fd_, src, left, static_cast<off_t>(offset));
|
||||
if (done < 0) {
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
return IOError(filename_, errno);
|
||||
}
|
||||
left -= done;
|
||||
offset += done;
|
||||
src += done;
|
||||
}
|
||||
filesize_ = offset + data.size();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PosixWritableFile::Close() {
|
||||
Status s;
|
||||
|
||||
|
@ -125,6 +125,7 @@ class PosixWritableFile : public WritableFile {
|
||||
virtual Status Truncate(uint64_t size) override { return Status::OK(); }
|
||||
virtual Status Close() override;
|
||||
virtual Status Append(const Slice& data) override;
|
||||
virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
|
||||
virtual Status Flush() override;
|
||||
virtual Status Sync() override;
|
||||
virtual Status Fsync() override;
|
||||
|
Loading…
Reference in New Issue
Block a user