2014-09-13 01:23:58 +02:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#include "db/write_thread.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
Deprecate WriteOptions::timeout_hint_us
Summary:
In one of our recent meetings, we discussed deprecating features that are not being actively used. One of those features, at least within Facebook, is timeout_hint. The feature is really nicely implemented, but if nobody needs it, we should remove it from our code-base (until we get a valid use-case). Some arguments:
* Less code == better icache hit rate, smaller builds, simpler code
* The motivation for adding timeout_hint_us was to work-around RocksDB's stall issue. However, we're currently addressing the stall issue itself (see @sdong's recent work on stall write_rate), so we should never see sharp lock-ups in the future.
* Nobody is using the feature within Facebook's code-base. Googling for `timeout_hint_us` also doesn't yield any users.
Test Plan: make check
Reviewers: anthony, kradhakrishnan, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D41937
2015-07-14 09:35:48 +02:00
|
|
|
void WriteThread::EnterWriteThread(WriteThread::Writer* w) {
|
2014-09-13 01:23:58 +02:00
|
|
|
// the following code block pushes the current writer "w" into the writer
|
|
|
|
// queue "writers_" and wait until one of the following conditions met:
|
|
|
|
// 1. the job of "w" has been done by some other writers.
|
|
|
|
// 2. "w" becomes the first writer in "writers_"
|
|
|
|
// 3. "w" timed-out.
|
|
|
|
writers_.push_back(w);
|
|
|
|
|
|
|
|
while (!w->done && w != writers_.front()) {
|
Deprecate WriteOptions::timeout_hint_us
Summary:
In one of our recent meetings, we discussed deprecating features that are not being actively used. One of those features, at least within Facebook, is timeout_hint. The feature is really nicely implemented, but if nobody needs it, we should remove it from our code-base (until we get a valid use-case). Some arguments:
* Less code == better icache hit rate, smaller builds, simpler code
* The motivation for adding timeout_hint_us was to work-around RocksDB's stall issue. However, we're currently addressing the stall issue itself (see @sdong's recent work on stall write_rate), so we should never see sharp lock-ups in the future.
* Nobody is using the feature within Facebook's code-base. Googling for `timeout_hint_us` also doesn't yield any users.
Test Plan: make check
Reviewers: anthony, kradhakrishnan, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D41937
2015-07-14 09:35:48 +02:00
|
|
|
w->cv.Wait();
|
2014-09-13 01:23:58 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void WriteThread::ExitWriteThread(WriteThread::Writer* w,
|
|
|
|
WriteThread::Writer* last_writer,
|
|
|
|
Status status) {
|
|
|
|
// Pop out the current writer and all writers being pushed before the
|
|
|
|
// current writer from the writer queue.
|
|
|
|
while (!writers_.empty()) {
|
|
|
|
Writer* ready = writers_.front();
|
|
|
|
writers_.pop_front();
|
|
|
|
if (ready != w) {
|
|
|
|
ready->status = status;
|
|
|
|
ready->done = true;
|
|
|
|
ready->cv.Signal();
|
|
|
|
}
|
|
|
|
if (ready == last_writer) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Notify new head of write queue
|
|
|
|
if (!writers_.empty()) {
|
|
|
|
writers_.front()->cv.Signal();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// This function will be called only when the first writer succeeds.
|
|
|
|
// All writers in the to-be-built batch group will be processed.
|
|
|
|
//
|
|
|
|
// REQUIRES: Writer list must be non-empty
|
|
|
|
// REQUIRES: First writer must have a non-nullptr batch
|
2015-05-16 00:52:51 +02:00
|
|
|
size_t WriteThread::BuildBatchGroup(
|
|
|
|
WriteThread::Writer** last_writer,
|
|
|
|
autovector<WriteBatch*>* write_batch_group) {
|
2014-09-13 01:23:58 +02:00
|
|
|
assert(!writers_.empty());
|
|
|
|
Writer* first = writers_.front();
|
|
|
|
assert(first->batch != nullptr);
|
|
|
|
|
|
|
|
size_t size = WriteBatchInternal::ByteSize(first->batch);
|
|
|
|
write_batch_group->push_back(first->batch);
|
|
|
|
|
|
|
|
// Allow the group to grow up to a maximum size, but if the
|
|
|
|
// original write is small, limit the growth so we do not slow
|
|
|
|
// down the small write too much.
|
|
|
|
size_t max_size = 1 << 20;
|
|
|
|
if (size <= (128<<10)) {
|
|
|
|
max_size = size + (128<<10);
|
|
|
|
}
|
|
|
|
|
|
|
|
*last_writer = first;
|
2015-05-29 23:36:35 +02:00
|
|
|
|
|
|
|
if (first->has_callback) {
|
|
|
|
// TODO(agiardullo:) Batching not currently supported as this write may
|
|
|
|
// fail if the callback function decides to abort this write.
|
2015-05-16 00:52:51 +02:00
|
|
|
return size;
|
2015-05-29 23:36:35 +02:00
|
|
|
}
|
|
|
|
|
2014-09-13 01:23:58 +02:00
|
|
|
std::deque<Writer*>::iterator iter = writers_.begin();
|
|
|
|
++iter; // Advance past "first"
|
|
|
|
for (; iter != writers_.end(); ++iter) {
|
|
|
|
Writer* w = *iter;
|
|
|
|
if (w->sync && !first->sync) {
|
|
|
|
// Do not include a sync write into a batch handled by a non-sync write.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!w->disableWAL && first->disableWAL) {
|
|
|
|
// Do not include a write that needs WAL into a batch that has
|
|
|
|
// WAL disabled.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-05-29 23:36:35 +02:00
|
|
|
if (w->has_callback) {
|
|
|
|
// Do not include writes which may be aborted if the callback does not
|
|
|
|
// succeed.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-09-13 01:23:58 +02:00
|
|
|
if (w->batch == nullptr) {
|
|
|
|
// Do not include those writes with nullptr batch. Those are not writes,
|
|
|
|
// those are something else. They want to be alone
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
size += WriteBatchInternal::ByteSize(w->batch);
|
|
|
|
if (size > max_size) {
|
|
|
|
// Do not make batch too big
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
write_batch_group->push_back(w->batch);
|
|
|
|
w->in_batch_group = true;
|
|
|
|
*last_writer = w;
|
|
|
|
}
|
2015-05-16 00:52:51 +02:00
|
|
|
return size;
|
2014-09-13 01:23:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace rocksdb
|