2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2013-10-16 23:59:46 +02:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
2017-04-28 02:50:56 +02:00
|
|
|
// This source code is also licensed under the GPLv2 license found in the
|
|
|
|
// COPYING file in the root directory of this source tree.
|
2013-10-16 23:59:46 +02:00
|
|
|
//
|
2011-03-18 23:37:00 +01:00
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
2013-08-23 17:38:13 +02:00
|
|
|
#include "rocksdb/db.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
|
2013-07-23 23:42:27 +02:00
|
|
|
#include <memory>
|
2014-03-14 21:40:06 +01:00
|
|
|
#include "db/column_family.h"
|
2016-06-21 03:01:03 +02:00
|
|
|
#include "db/memtable.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
#include "db/write_batch_internal.h"
|
2013-08-23 17:38:13 +02:00
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/memtablerep.h"
|
2014-08-19 00:19:17 +02:00
|
|
|
#include "rocksdb/utilities/write_batch_with_index.h"
|
2016-06-21 03:01:03 +02:00
|
|
|
#include "rocksdb/write_buffer_manager.h"
|
2015-10-13 00:06:38 +02:00
|
|
|
#include "table/scoped_arena_iterator.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
#include "util/logging.h"
|
2015-03-20 01:29:37 +01:00
|
|
|
#include "util/string_util.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
#include "util/testharness.h"
|
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
namespace rocksdb {
|
2011-03-18 23:37:00 +01:00
|
|
|
|
|
|
|
static std::string PrintContents(WriteBatch* b) {
|
|
|
|
InternalKeyComparator cmp(BytewiseComparator());
|
2013-07-23 23:42:27 +02:00
|
|
|
auto factory = std::make_shared<SkipListFactory>();
|
2014-01-15 00:32:37 +01:00
|
|
|
Options options;
|
|
|
|
options.memtable_factory = factory;
|
2014-10-02 01:19:16 +02:00
|
|
|
ImmutableCFOptions ioptions(options);
|
2016-06-21 03:01:03 +02:00
|
|
|
WriteBufferManager wb(options.db_write_buffer_size);
|
2016-09-14 06:11:59 +02:00
|
|
|
MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
|
2017-06-02 21:08:01 +02:00
|
|
|
kMaxSequenceNumber, 0 /* column_family_id */);
|
2011-05-21 04:17:43 +02:00
|
|
|
mem->Ref();
|
2011-03-18 23:37:00 +01:00
|
|
|
std::string state;
|
2014-11-18 19:20:10 +01:00
|
|
|
ColumnFamilyMemTablesDefault cf_mems_default(mem);
|
support for concurrent adds to memtable
Summary:
This diff adds support for concurrent adds to the skiplist memtable
implementations. Memory allocation is made thread-safe by the addition of
a spinlock, with small per-core buffers to avoid contention. Concurrent
memtable writes are made via an additional method and don't impose a
performance overhead on the non-concurrent case, so parallelism can be
selected on a per-batch basis.
Write thread synchronization is an increasing bottleneck for higher levels
of concurrency, so this diff adds --enable_write_thread_adaptive_yield
(default off). This feature causes threads joining a write batch
group to spin for a short time (default 100 usec) using sched_yield,
rather than going to sleep on a mutex. If the timing of the yield calls
indicates that another thread has actually run during the yield then
spinning is avoided. This option improves performance for concurrent
situations even without parallel adds, although it has the potential to
increase CPU usage (and the heuristic adaptation is not yet mature).
Parallel writes are not currently compatible with
inplace updates, update callbacks, or delete filtering.
Enable it with --allow_concurrent_memtable_write (and
--enable_write_thread_adaptive_yield). Parallel memtable writes
are performance neutral when there is no actual parallelism, and in
my experiments (SSD server-class Linux and varying contention and key
sizes for fillrandom) they are always a performance win when there is
more than one thread.
Statistics are updated earlier in the write path, dropping the number
of DB mutex acquisitions from 2 to 1 for almost all cases.
This diff was motivated and inspired by Yahoo's cLSM work. It is more
conservative than cLSM: RocksDB's write batch group leader role is
preserved (along with all of the existing flush and write throttling
logic) and concurrent writers are blocked until all memtable insertions
have completed and the sequence number has been advanced, to preserve
linearizability.
My test config is "db_bench -benchmarks=fillrandom -threads=$T
-batch_size=1 -memtablerep=skip_list -value_size=100 --num=1000000/$T
-level0_slowdown_writes_trigger=9999 -level0_stop_writes_trigger=9999
-disable_auto_compactions --max_write_buffer_number=8
-max_background_flushes=8 --disable_wal --write_buffer_size=160000000
--block_size=16384 --allow_concurrent_memtable_write" on a two-socket
Xeon E5-2660 @ 2.2Ghz with lots of memory and an SSD hard drive. With 1
thread I get ~440Kops/sec. Peak performance for 1 socket (numactl
-N1) is slightly more than 1Mops/sec, at 16 threads. Peak performance
across both sockets happens at 30 threads, and is ~900Kops/sec, although
with fewer threads there is less performance loss when the system has
background work.
Test Plan:
1. concurrent stress tests for InlineSkipList and DynamicBloom
2. make clean; make check
3. make clean; DISABLE_JEMALLOC=1 make valgrind_check; valgrind db_bench
4. make clean; COMPILE_WITH_TSAN=1 make all check; db_bench
5. make clean; COMPILE_WITH_ASAN=1 make all check; db_bench
6. make clean; OPT=-DROCKSDB_LITE make check
7. verify no perf regressions when disabled
Reviewers: igor, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, IslamAbdelRahman, anthony, yhchiang, rven, sdong, guyg8, kradhakrishnan, dhruba
Differential Revision: https://reviews.facebook.net/D50589
2015-08-15 01:59:07 +02:00
|
|
|
Status s = WriteBatchInternal::InsertInto(b, &cf_mems_default, nullptr);
|
2012-03-09 01:23:21 +01:00
|
|
|
int count = 0;
|
2015-11-06 16:03:30 +01:00
|
|
|
int put_count = 0;
|
|
|
|
int delete_count = 0;
|
|
|
|
int single_delete_count = 0;
|
2016-08-16 17:16:04 +02:00
|
|
|
int delete_range_count = 0;
|
2015-11-06 16:03:30 +01:00
|
|
|
int merge_count = 0;
|
2016-09-12 23:14:40 +02:00
|
|
|
for (int i = 0; i < 2; ++i) {
|
|
|
|
Arena arena;
|
2016-11-19 23:14:35 +01:00
|
|
|
ScopedArenaIterator arena_iter_guard;
|
|
|
|
std::unique_ptr<InternalIterator> iter_guard;
|
|
|
|
InternalIterator* iter;
|
|
|
|
if (i == 0) {
|
|
|
|
iter = mem->NewIterator(ReadOptions(), &arena);
|
|
|
|
arena_iter_guard.set(iter);
|
|
|
|
} else {
|
|
|
|
iter = mem->NewRangeTombstoneIterator(ReadOptions());
|
|
|
|
iter_guard.reset(iter);
|
|
|
|
}
|
2016-11-21 21:07:09 +01:00
|
|
|
if (iter == nullptr) {
|
|
|
|
continue;
|
|
|
|
}
|
2016-09-12 23:14:40 +02:00
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
ParsedInternalKey ikey;
|
|
|
|
memset((void*)&ikey, 0, sizeof(ikey));
|
|
|
|
EXPECT_TRUE(ParseInternalKey(iter->key(), &ikey));
|
|
|
|
switch (ikey.type) {
|
|
|
|
case kTypeValue:
|
|
|
|
state.append("Put(");
|
|
|
|
state.append(ikey.user_key.ToString());
|
|
|
|
state.append(", ");
|
|
|
|
state.append(iter->value().ToString());
|
|
|
|
state.append(")");
|
|
|
|
count++;
|
|
|
|
put_count++;
|
|
|
|
break;
|
|
|
|
case kTypeDeletion:
|
|
|
|
state.append("Delete(");
|
|
|
|
state.append(ikey.user_key.ToString());
|
|
|
|
state.append(")");
|
|
|
|
count++;
|
|
|
|
delete_count++;
|
|
|
|
break;
|
|
|
|
case kTypeSingleDeletion:
|
|
|
|
state.append("SingleDelete(");
|
|
|
|
state.append(ikey.user_key.ToString());
|
|
|
|
state.append(")");
|
|
|
|
count++;
|
|
|
|
single_delete_count++;
|
|
|
|
break;
|
|
|
|
case kTypeRangeDeletion:
|
|
|
|
state.append("DeleteRange(");
|
|
|
|
state.append(ikey.user_key.ToString());
|
|
|
|
state.append(", ");
|
|
|
|
state.append(iter->value().ToString());
|
|
|
|
state.append(")");
|
|
|
|
count++;
|
|
|
|
delete_range_count++;
|
|
|
|
break;
|
|
|
|
case kTypeMerge:
|
|
|
|
state.append("Merge(");
|
|
|
|
state.append(ikey.user_key.ToString());
|
|
|
|
state.append(", ");
|
|
|
|
state.append(iter->value().ToString());
|
|
|
|
state.append(")");
|
|
|
|
count++;
|
|
|
|
merge_count++;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(false);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
state.append("@");
|
|
|
|
state.append(NumberToString(ikey.sequence));
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
}
|
2015-11-06 16:03:30 +01:00
|
|
|
EXPECT_EQ(b->HasPut(), put_count > 0);
|
|
|
|
EXPECT_EQ(b->HasDelete(), delete_count > 0);
|
|
|
|
EXPECT_EQ(b->HasSingleDelete(), single_delete_count > 0);
|
2016-08-16 17:16:04 +02:00
|
|
|
EXPECT_EQ(b->HasDeleteRange(), delete_range_count > 0);
|
2015-11-06 16:03:30 +01:00
|
|
|
EXPECT_EQ(b->HasMerge(), merge_count > 0);
|
2011-03-18 23:37:00 +01:00
|
|
|
if (!s.ok()) {
|
2013-08-15 01:32:46 +02:00
|
|
|
state.append(s.ToString());
|
2012-03-09 01:23:21 +01:00
|
|
|
} else if (count != WriteBatchInternal::Count(b)) {
|
|
|
|
state.append("CountMismatch()");
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
2013-12-02 06:23:44 +01:00
|
|
|
delete mem->Unref();
|
2011-03-18 23:37:00 +01:00
|
|
|
return state;
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
class WriteBatchTest : public testing::Test {};
|
2011-03-18 23:37:00 +01:00
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, Empty) {
|
2011-03-18 23:37:00 +01:00
|
|
|
WriteBatch batch;
|
|
|
|
ASSERT_EQ("", PrintContents(&batch));
|
|
|
|
ASSERT_EQ(0, WriteBatchInternal::Count(&batch));
|
2013-06-26 19:50:58 +02:00
|
|
|
ASSERT_EQ(0, batch.Count());
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, Multiple) {
|
2011-03-18 23:37:00 +01:00
|
|
|
WriteBatch batch;
|
|
|
|
batch.Put(Slice("foo"), Slice("bar"));
|
|
|
|
batch.Delete(Slice("box"));
|
2016-08-16 17:16:04 +02:00
|
|
|
batch.DeleteRange(Slice("bar"), Slice("foo"));
|
2011-03-18 23:37:00 +01:00
|
|
|
batch.Put(Slice("baz"), Slice("boo"));
|
|
|
|
WriteBatchInternal::SetSequence(&batch, 100);
|
2012-11-06 21:02:18 +01:00
|
|
|
ASSERT_EQ(100U, WriteBatchInternal::Sequence(&batch));
|
2016-08-16 17:16:04 +02:00
|
|
|
ASSERT_EQ(4, WriteBatchInternal::Count(&batch));
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Put(baz, boo)@103"
|
|
|
|
"Delete(box)@101"
|
2016-09-12 23:14:40 +02:00
|
|
|
"Put(foo, bar)@100"
|
|
|
|
"DeleteRange(bar, foo)@102",
|
2016-08-16 17:16:04 +02:00
|
|
|
PrintContents(&batch));
|
|
|
|
ASSERT_EQ(4, batch.Count());
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, Corruption) {
|
2011-03-18 23:37:00 +01:00
|
|
|
WriteBatch batch;
|
|
|
|
batch.Put(Slice("foo"), Slice("bar"));
|
|
|
|
batch.Delete(Slice("box"));
|
|
|
|
WriteBatchInternal::SetSequence(&batch, 200);
|
|
|
|
Slice contents = WriteBatchInternal::Contents(&batch);
|
|
|
|
WriteBatchInternal::SetContents(&batch,
|
|
|
|
Slice(contents.data(),contents.size()-1));
|
|
|
|
ASSERT_EQ("Put(foo, bar)@200"
|
2013-08-15 01:32:46 +02:00
|
|
|
"Corruption: bad WriteBatch Delete",
|
2011-03-18 23:37:00 +01:00
|
|
|
PrintContents(&batch));
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, Append) {
|
2012-03-09 01:23:21 +01:00
|
|
|
WriteBatch b1, b2;
|
|
|
|
WriteBatchInternal::SetSequence(&b1, 200);
|
|
|
|
WriteBatchInternal::SetSequence(&b2, 300);
|
|
|
|
WriteBatchInternal::Append(&b1, &b2);
|
|
|
|
ASSERT_EQ("",
|
|
|
|
PrintContents(&b1));
|
2013-06-26 19:50:58 +02:00
|
|
|
ASSERT_EQ(0, b1.Count());
|
2012-03-09 01:23:21 +01:00
|
|
|
b2.Put("a", "va");
|
|
|
|
WriteBatchInternal::Append(&b1, &b2);
|
|
|
|
ASSERT_EQ("Put(a, va)@200",
|
|
|
|
PrintContents(&b1));
|
2013-06-26 19:50:58 +02:00
|
|
|
ASSERT_EQ(1, b1.Count());
|
2012-03-09 01:23:21 +01:00
|
|
|
b2.Clear();
|
|
|
|
b2.Put("b", "vb");
|
|
|
|
WriteBatchInternal::Append(&b1, &b2);
|
|
|
|
ASSERT_EQ("Put(a, va)@200"
|
|
|
|
"Put(b, vb)@201",
|
|
|
|
PrintContents(&b1));
|
2013-06-26 19:50:58 +02:00
|
|
|
ASSERT_EQ(2, b1.Count());
|
2012-03-09 01:23:21 +01:00
|
|
|
b2.Delete("foo");
|
|
|
|
WriteBatchInternal::Append(&b1, &b2);
|
|
|
|
ASSERT_EQ("Put(a, va)@200"
|
|
|
|
"Put(b, vb)@202"
|
|
|
|
"Put(b, vb)@201"
|
|
|
|
"Delete(foo)@203",
|
|
|
|
PrintContents(&b1));
|
2013-06-26 19:50:58 +02:00
|
|
|
ASSERT_EQ(4, b1.Count());
|
Add facility to write only a portion of WriteBatch to WAL
Summary:
When constructing a write batch a client may now call MarkWalTerminationPoint() on that batch. No batch operations after this call will be added written to the WAL but will still be inserted into the Memtable. This facility is used to remove one of the three WriteImpl calls in 2PC transactions. This produces a ~1% perf improvement.
```
RocksDB - unoptimized 2pc, sync_binlog=1, disable_2pc=off
INFO 2016-08-31 14:30:38,814 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2619 seconds. Requests/second = 28628
RocksDB - optimized 2pc , sync_binlog=1, disable_2pc=off
INFO 2016-08-31 16:26:59,442 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2581 seconds. Requests/second = 29054
```
Test Plan: Two unit tests added.
Reviewers: sdong, yiwu, IslamAbdelRahman
Reviewed By: yiwu
Subscribers: hermanlee4, dhruba, andrewkr
Differential Revision: https://reviews.facebook.net/D64599
2016-10-07 20:31:26 +02:00
|
|
|
b2.Clear();
|
|
|
|
b2.Put("c", "cc");
|
|
|
|
b2.Put("d", "dd");
|
|
|
|
b2.MarkWalTerminationPoint();
|
|
|
|
b2.Put("e", "ee");
|
|
|
|
WriteBatchInternal::Append(&b1, &b2, /*wal only*/ true);
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Put(a, va)@200"
|
|
|
|
"Put(b, vb)@202"
|
|
|
|
"Put(b, vb)@201"
|
|
|
|
"Put(c, cc)@204"
|
|
|
|
"Put(d, dd)@205"
|
|
|
|
"Delete(foo)@203",
|
|
|
|
PrintContents(&b1));
|
|
|
|
ASSERT_EQ(6, b1.Count());
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Put(c, cc)@0"
|
|
|
|
"Put(d, dd)@1"
|
|
|
|
"Put(e, ee)@2",
|
|
|
|
PrintContents(&b2));
|
|
|
|
ASSERT_EQ(3, b2.Count());
|
2012-03-09 01:23:21 +01:00
|
|
|
}
|
|
|
|
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
TEST_F(WriteBatchTest, SingleDeletion) {
|
|
|
|
WriteBatch batch;
|
|
|
|
WriteBatchInternal::SetSequence(&batch, 100);
|
|
|
|
ASSERT_EQ("", PrintContents(&batch));
|
|
|
|
ASSERT_EQ(0, batch.Count());
|
|
|
|
batch.Put("a", "va");
|
|
|
|
ASSERT_EQ("Put(a, va)@100", PrintContents(&batch));
|
|
|
|
ASSERT_EQ(1, batch.Count());
|
|
|
|
batch.SingleDelete("a");
|
|
|
|
ASSERT_EQ(
|
|
|
|
"SingleDelete(a)@101"
|
|
|
|
"Put(a, va)@100",
|
|
|
|
PrintContents(&batch));
|
|
|
|
ASSERT_EQ(2, batch.Count());
|
|
|
|
}
|
|
|
|
|
2013-08-22 03:27:48 +02:00
|
|
|
namespace {
|
|
|
|
struct TestHandler : public WriteBatch::Handler {
|
|
|
|
std::string seen;
|
2014-02-26 02:30:54 +01:00
|
|
|
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
|
2015-02-26 20:28:41 +01:00
|
|
|
const Slice& value) override {
|
2014-01-07 23:41:42 +01:00
|
|
|
if (column_family_id == 0) {
|
|
|
|
seen += "Put(" + key.ToString() + ", " + value.ToString() + ")";
|
|
|
|
} else {
|
2014-11-25 05:44:49 +01:00
|
|
|
seen += "PutCF(" + ToString(column_family_id) + ", " +
|
2014-01-07 23:41:42 +01:00
|
|
|
key.ToString() + ", " + value.ToString() + ")";
|
|
|
|
}
|
2014-02-26 02:30:54 +01:00
|
|
|
return Status::OK();
|
2013-08-22 03:27:48 +02:00
|
|
|
}
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
virtual Status DeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
if (column_family_id == 0) {
|
|
|
|
seen += "Delete(" + key.ToString() + ")";
|
|
|
|
} else {
|
|
|
|
seen += "DeleteCF(" + ToString(column_family_id) + ", " +
|
|
|
|
key.ToString() + ")";
|
|
|
|
}
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status SingleDeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
if (column_family_id == 0) {
|
|
|
|
seen += "SingleDelete(" + key.ToString() + ")";
|
|
|
|
} else {
|
|
|
|
seen += "SingleDeleteCF(" + ToString(column_family_id) + ", " +
|
|
|
|
key.ToString() + ")";
|
|
|
|
}
|
|
|
|
return Status::OK();
|
|
|
|
}
|
2016-08-16 17:16:04 +02:00
|
|
|
virtual Status DeleteRangeCF(uint32_t column_family_id,
|
|
|
|
const Slice& begin_key,
|
|
|
|
const Slice& end_key) override {
|
|
|
|
if (column_family_id == 0) {
|
|
|
|
seen += "DeleteRange(" + begin_key.ToString() + ", " +
|
|
|
|
end_key.ToString() + ")";
|
|
|
|
} else {
|
|
|
|
seen += "DeleteRangeCF(" + ToString(column_family_id) + ", " +
|
|
|
|
begin_key.ToString() + ", " + end_key.ToString() + ")";
|
|
|
|
}
|
|
|
|
return Status::OK();
|
|
|
|
}
|
2014-02-26 02:30:54 +01:00
|
|
|
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
|
2015-02-26 20:28:41 +01:00
|
|
|
const Slice& value) override {
|
2014-01-07 23:41:42 +01:00
|
|
|
if (column_family_id == 0) {
|
|
|
|
seen += "Merge(" + key.ToString() + ", " + value.ToString() + ")";
|
|
|
|
} else {
|
2014-11-25 05:44:49 +01:00
|
|
|
seen += "MergeCF(" + ToString(column_family_id) + ", " +
|
2014-01-07 23:41:42 +01:00
|
|
|
key.ToString() + ", " + value.ToString() + ")";
|
|
|
|
}
|
2014-02-26 02:30:54 +01:00
|
|
|
return Status::OK();
|
2013-08-22 03:27:48 +02:00
|
|
|
}
|
2015-02-26 20:28:41 +01:00
|
|
|
virtual void LogData(const Slice& blob) override {
|
2013-08-22 03:27:48 +02:00
|
|
|
seen += "LogData(" + blob.ToString() + ")";
|
|
|
|
}
|
Modification of WriteBatch to support two phase commit
Summary: Adds three new WriteBatch data types: Prepare(xid), Commit(xid), Rollback(xid). Prepare(xid) should precede the (single) operation to which is applies. There can obviously be multiple Prepare(xid) markers. There should only be one Rollback(xid) or Commit(xid) marker yet not both. None of this logic is currently enforced and will most likely be implemented further up such as in the memtableinserter. All three markers are similar to PutLogData in that they are writebatch meta-data, ie stored but not counted. All three markers differ from PutLogData in that they will actually be written to disk. As for WriteBatchWithIndex, Prepare, Commit, Rollback are all implemented just as PutLogData and none are tested just as PutLogData.
Test Plan: single unit test in write_batch_test.
Reviewers: hermanlee4, sdong, anthony
Subscribers: leveldb, dhruba, vasilep, andrewkr
Differential Revision: https://reviews.facebook.net/D57867
2016-04-08 08:35:51 +02:00
|
|
|
virtual Status MarkBeginPrepare() override {
|
|
|
|
seen += "MarkBeginPrepare()";
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status MarkEndPrepare(const Slice& xid) override {
|
|
|
|
seen += "MarkEndPrepare(" + xid.ToString() + ")";
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status MarkCommit(const Slice& xid) override {
|
|
|
|
seen += "MarkCommit(" + xid.ToString() + ")";
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status MarkRollback(const Slice& xid) override {
|
|
|
|
seen += "MarkRollback(" + xid.ToString() + ")";
|
|
|
|
return Status::OK();
|
|
|
|
}
|
2013-08-22 03:27:48 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
TEST_F(WriteBatchTest, PutNotImplemented) {
|
2014-12-04 21:01:55 +01:00
|
|
|
WriteBatch batch;
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.Put(Slice("k1"), Slice("v1"));
|
2014-12-04 21:01:55 +01:00
|
|
|
ASSERT_EQ(1, batch.Count());
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
ASSERT_EQ("Put(k1, v1)@0", PrintContents(&batch));
|
2014-12-04 21:01:55 +01:00
|
|
|
|
|
|
|
WriteBatch::Handler handler;
|
|
|
|
ASSERT_OK(batch.Iterate(&handler));
|
|
|
|
}
|
|
|
|
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
TEST_F(WriteBatchTest, DeleteNotImplemented) {
|
2014-12-04 21:01:55 +01:00
|
|
|
WriteBatch batch;
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.Delete(Slice("k2"));
|
2014-12-04 21:01:55 +01:00
|
|
|
ASSERT_EQ(1, batch.Count());
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
ASSERT_EQ("Delete(k2)@0", PrintContents(&batch));
|
2014-12-04 21:01:55 +01:00
|
|
|
|
|
|
|
WriteBatch::Handler handler;
|
|
|
|
ASSERT_OK(batch.Iterate(&handler));
|
|
|
|
}
|
|
|
|
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
TEST_F(WriteBatchTest, SingleDeleteNotImplemented) {
|
2014-12-04 21:01:55 +01:00
|
|
|
WriteBatch batch;
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.SingleDelete(Slice("k2"));
|
2014-12-04 21:01:55 +01:00
|
|
|
ASSERT_EQ(1, batch.Count());
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
ASSERT_EQ("SingleDelete(k2)@0", PrintContents(&batch));
|
|
|
|
|
|
|
|
WriteBatch::Handler handler;
|
|
|
|
ASSERT_OK(batch.Iterate(&handler));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(WriteBatchTest, MergeNotImplemented) {
|
|
|
|
WriteBatch batch;
|
|
|
|
batch.Merge(Slice("foo"), Slice("bar"));
|
|
|
|
ASSERT_EQ(1, batch.Count());
|
|
|
|
ASSERT_EQ("Merge(foo, bar)@0", PrintContents(&batch));
|
2014-12-04 21:01:55 +01:00
|
|
|
|
|
|
|
WriteBatch::Handler handler;
|
|
|
|
ASSERT_OK(batch.Iterate(&handler));
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, Blob) {
|
2013-08-15 01:32:46 +02:00
|
|
|
WriteBatch batch;
|
|
|
|
batch.Put(Slice("k1"), Slice("v1"));
|
|
|
|
batch.Put(Slice("k2"), Slice("v2"));
|
|
|
|
batch.Put(Slice("k3"), Slice("v3"));
|
|
|
|
batch.PutLogData(Slice("blob1"));
|
|
|
|
batch.Delete(Slice("k2"));
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.SingleDelete(Slice("k3"));
|
2013-08-15 01:32:46 +02:00
|
|
|
batch.PutLogData(Slice("blob2"));
|
|
|
|
batch.Merge(Slice("foo"), Slice("bar"));
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
ASSERT_EQ(6, batch.Count());
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Merge(foo, bar)@5"
|
|
|
|
"Put(k1, v1)@0"
|
|
|
|
"Delete(k2)@3"
|
|
|
|
"Put(k2, v2)@1"
|
|
|
|
"SingleDelete(k3)@4"
|
|
|
|
"Put(k3, v3)@2",
|
|
|
|
PrintContents(&batch));
|
2013-08-15 01:32:46 +02:00
|
|
|
|
2013-08-22 03:27:48 +02:00
|
|
|
TestHandler handler;
|
|
|
|
batch.Iterate(&handler);
|
|
|
|
ASSERT_EQ(
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
"Put(k1, v1)"
|
|
|
|
"Put(k2, v2)"
|
|
|
|
"Put(k3, v3)"
|
|
|
|
"LogData(blob1)"
|
|
|
|
"Delete(k2)"
|
|
|
|
"SingleDelete(k3)"
|
|
|
|
"LogData(blob2)"
|
|
|
|
"Merge(foo, bar)",
|
|
|
|
handler.seen);
|
2013-08-22 03:27:48 +02:00
|
|
|
}
|
|
|
|
|
Modification of WriteBatch to support two phase commit
Summary: Adds three new WriteBatch data types: Prepare(xid), Commit(xid), Rollback(xid). Prepare(xid) should precede the (single) operation to which is applies. There can obviously be multiple Prepare(xid) markers. There should only be one Rollback(xid) or Commit(xid) marker yet not both. None of this logic is currently enforced and will most likely be implemented further up such as in the memtableinserter. All three markers are similar to PutLogData in that they are writebatch meta-data, ie stored but not counted. All three markers differ from PutLogData in that they will actually be written to disk. As for WriteBatchWithIndex, Prepare, Commit, Rollback are all implemented just as PutLogData and none are tested just as PutLogData.
Test Plan: single unit test in write_batch_test.
Reviewers: hermanlee4, sdong, anthony
Subscribers: leveldb, dhruba, vasilep, andrewkr
Differential Revision: https://reviews.facebook.net/D57867
2016-04-08 08:35:51 +02:00
|
|
|
TEST_F(WriteBatchTest, PrepareCommit) {
|
|
|
|
WriteBatch batch;
|
|
|
|
WriteBatchInternal::InsertNoop(&batch);
|
|
|
|
batch.Put(Slice("k1"), Slice("v1"));
|
|
|
|
batch.Put(Slice("k2"), Slice("v2"));
|
|
|
|
batch.SetSavePoint();
|
|
|
|
WriteBatchInternal::MarkEndPrepare(&batch, Slice("xid1"));
|
|
|
|
Status s = batch.RollbackToSavePoint();
|
|
|
|
ASSERT_EQ(s, Status::NotFound());
|
|
|
|
WriteBatchInternal::MarkCommit(&batch, Slice("xid1"));
|
|
|
|
WriteBatchInternal::MarkRollback(&batch, Slice("xid1"));
|
|
|
|
ASSERT_EQ(2, batch.Count());
|
|
|
|
|
|
|
|
TestHandler handler;
|
|
|
|
batch.Iterate(&handler);
|
|
|
|
ASSERT_EQ(
|
|
|
|
"MarkBeginPrepare()"
|
|
|
|
"Put(k1, v1)"
|
|
|
|
"Put(k2, v2)"
|
|
|
|
"MarkEndPrepare(xid1)"
|
|
|
|
"MarkCommit(xid1)"
|
|
|
|
"MarkRollback(xid1)",
|
|
|
|
handler.seen);
|
|
|
|
}
|
|
|
|
|
2016-02-01 20:03:28 +01:00
|
|
|
// It requires more than 30GB of memory to run the test. With single memory
|
|
|
|
// allocation of more than 30GB.
|
|
|
|
// Not all platform can run it. Also it runs a long time. So disable it.
|
|
|
|
TEST_F(WriteBatchTest, DISABLED_ManyUpdates) {
|
|
|
|
// Insert key and value of 3GB and push total batch size to 12GB.
|
2016-02-02 01:07:53 +01:00
|
|
|
static const size_t kKeyValueSize = 4u;
|
|
|
|
static const uint32_t kNumUpdates = 3 << 30;
|
2016-02-01 20:03:28 +01:00
|
|
|
std::string raw(kKeyValueSize, 'A');
|
|
|
|
WriteBatch batch(kNumUpdates * (4 + kKeyValueSize * 2) + 1024u);
|
|
|
|
char c = 'A';
|
|
|
|
for (uint32_t i = 0; i < kNumUpdates; i++) {
|
|
|
|
if (c > 'Z') {
|
|
|
|
c = 'A';
|
|
|
|
}
|
|
|
|
raw[0] = c;
|
|
|
|
raw[raw.length() - 1] = c;
|
|
|
|
c++;
|
|
|
|
batch.Put(raw, raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(kNumUpdates, batch.Count());
|
|
|
|
|
|
|
|
struct NoopHandler : public WriteBatch::Handler {
|
|
|
|
uint32_t num_seen = 0;
|
|
|
|
char expected_char = 'A';
|
|
|
|
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
|
|
|
|
const Slice& value) override {
|
|
|
|
EXPECT_EQ(kKeyValueSize, key.size());
|
|
|
|
EXPECT_EQ(kKeyValueSize, value.size());
|
|
|
|
EXPECT_EQ(expected_char, key[0]);
|
|
|
|
EXPECT_EQ(expected_char, value[0]);
|
|
|
|
EXPECT_EQ(expected_char, key[kKeyValueSize - 1]);
|
|
|
|
EXPECT_EQ(expected_char, value[kKeyValueSize - 1]);
|
|
|
|
expected_char++;
|
|
|
|
if (expected_char > 'Z') {
|
|
|
|
expected_char = 'A';
|
|
|
|
}
|
|
|
|
++num_seen;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status DeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
EXPECT_TRUE(false);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status SingleDeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
EXPECT_TRUE(false);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
|
|
|
|
const Slice& value) override {
|
|
|
|
EXPECT_TRUE(false);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual void LogData(const Slice& blob) override { EXPECT_TRUE(false); }
|
|
|
|
virtual bool Continue() override { return num_seen < kNumUpdates; }
|
|
|
|
} handler;
|
|
|
|
|
|
|
|
batch.Iterate(&handler);
|
|
|
|
ASSERT_EQ(kNumUpdates, handler.num_seen);
|
|
|
|
}
|
|
|
|
|
|
|
|
// The test requires more than 18GB memory to run it, with single memory
|
|
|
|
// allocation of more than 12GB. Not all the platform can run it. So disable it.
|
|
|
|
TEST_F(WriteBatchTest, DISABLED_LargeKeyValue) {
|
|
|
|
// Insert key and value of 3GB and push total batch size to 12GB.
|
2016-02-02 01:07:53 +01:00
|
|
|
static const size_t kKeyValueSize = 3221225472u;
|
2016-02-01 20:03:28 +01:00
|
|
|
std::string raw(kKeyValueSize, 'A');
|
2017-03-31 01:47:19 +02:00
|
|
|
WriteBatch batch(size_t(12884901888ull + 1024u));
|
2016-02-01 20:03:28 +01:00
|
|
|
for (char i = 0; i < 2; i++) {
|
|
|
|
raw[0] = 'A' + i;
|
|
|
|
raw[raw.length() - 1] = 'A' - i;
|
|
|
|
batch.Put(raw, raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(2, batch.Count());
|
|
|
|
|
|
|
|
struct NoopHandler : public WriteBatch::Handler {
|
|
|
|
int num_seen = 0;
|
|
|
|
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
|
|
|
|
const Slice& value) override {
|
|
|
|
EXPECT_EQ(kKeyValueSize, key.size());
|
|
|
|
EXPECT_EQ(kKeyValueSize, value.size());
|
|
|
|
EXPECT_EQ('A' + num_seen, key[0]);
|
|
|
|
EXPECT_EQ('A' + num_seen, value[0]);
|
|
|
|
EXPECT_EQ('A' - num_seen, key[kKeyValueSize - 1]);
|
|
|
|
EXPECT_EQ('A' - num_seen, value[kKeyValueSize - 1]);
|
|
|
|
++num_seen;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status DeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
EXPECT_TRUE(false);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status SingleDeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
EXPECT_TRUE(false);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
|
|
|
|
const Slice& value) override {
|
|
|
|
EXPECT_TRUE(false);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
virtual void LogData(const Slice& blob) override { EXPECT_TRUE(false); }
|
|
|
|
virtual bool Continue() override { return num_seen < 2; }
|
|
|
|
} handler;
|
|
|
|
|
|
|
|
batch.Iterate(&handler);
|
|
|
|
ASSERT_EQ(2, handler.num_seen);
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, Continue) {
|
2013-08-22 03:27:48 +02:00
|
|
|
WriteBatch batch;
|
|
|
|
|
|
|
|
struct Handler : public TestHandler {
|
|
|
|
int num_seen = 0;
|
2014-02-26 02:30:54 +01:00
|
|
|
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
|
2015-02-26 20:28:41 +01:00
|
|
|
const Slice& value) override {
|
2013-08-22 03:27:48 +02:00
|
|
|
++num_seen;
|
2014-02-26 02:30:54 +01:00
|
|
|
return TestHandler::PutCF(column_family_id, key, value);
|
2013-08-15 01:32:46 +02:00
|
|
|
}
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
virtual Status DeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
++num_seen;
|
|
|
|
return TestHandler::DeleteCF(column_family_id, key);
|
|
|
|
}
|
|
|
|
virtual Status SingleDeleteCF(uint32_t column_family_id,
|
|
|
|
const Slice& key) override {
|
|
|
|
++num_seen;
|
|
|
|
return TestHandler::SingleDeleteCF(column_family_id, key);
|
|
|
|
}
|
2014-02-26 02:30:54 +01:00
|
|
|
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
|
2015-02-26 20:28:41 +01:00
|
|
|
const Slice& value) override {
|
2013-08-22 03:27:48 +02:00
|
|
|
++num_seen;
|
2014-02-26 02:30:54 +01:00
|
|
|
return TestHandler::MergeCF(column_family_id, key, value);
|
2013-08-15 01:32:46 +02:00
|
|
|
}
|
2015-02-26 20:28:41 +01:00
|
|
|
virtual void LogData(const Slice& blob) override {
|
2013-08-22 03:27:48 +02:00
|
|
|
++num_seen;
|
|
|
|
TestHandler::LogData(blob);
|
2013-08-15 01:32:46 +02:00
|
|
|
}
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
virtual bool Continue() override { return num_seen < 5; }
|
2013-08-15 01:32:46 +02:00
|
|
|
} handler;
|
2013-08-22 03:27:48 +02:00
|
|
|
|
|
|
|
batch.Put(Slice("k1"), Slice("v1"));
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.Put(Slice("k2"), Slice("v2"));
|
2013-08-22 03:27:48 +02:00
|
|
|
batch.PutLogData(Slice("blob1"));
|
|
|
|
batch.Delete(Slice("k1"));
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.SingleDelete(Slice("k2"));
|
2013-08-22 03:27:48 +02:00
|
|
|
batch.PutLogData(Slice("blob2"));
|
|
|
|
batch.Merge(Slice("foo"), Slice("bar"));
|
2013-08-15 01:32:46 +02:00
|
|
|
batch.Iterate(&handler);
|
|
|
|
ASSERT_EQ(
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
"Put(k1, v1)"
|
|
|
|
"Put(k2, v2)"
|
|
|
|
"LogData(blob1)"
|
|
|
|
"Delete(k1)"
|
|
|
|
"SingleDelete(k2)",
|
|
|
|
handler.seen);
|
2013-08-15 01:32:46 +02:00
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, PutGatherSlices) {
|
2013-11-07 21:37:58 +01:00
|
|
|
WriteBatch batch;
|
|
|
|
batch.Put(Slice("foo"), Slice("bar"));
|
|
|
|
|
|
|
|
{
|
|
|
|
// Try a write where the key is one slice but the value is two
|
|
|
|
Slice key_slice("baz");
|
|
|
|
Slice value_slices[2] = { Slice("header"), Slice("payload") };
|
|
|
|
batch.Put(SliceParts(&key_slice, 1),
|
|
|
|
SliceParts(value_slices, 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
// One where the key is composite but the value is a single slice
|
|
|
|
Slice key_slices[3] = { Slice("key"), Slice("part2"), Slice("part3") };
|
|
|
|
Slice value_slice("value");
|
|
|
|
batch.Put(SliceParts(key_slices, 3),
|
|
|
|
SliceParts(&value_slice, 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
WriteBatchInternal::SetSequence(&batch, 100);
|
|
|
|
ASSERT_EQ("Put(baz, headerpayload)@101"
|
|
|
|
"Put(foo, bar)@100"
|
|
|
|
"Put(keypart2part3, value)@102",
|
|
|
|
PrintContents(&batch));
|
|
|
|
ASSERT_EQ(3, batch.Count());
|
|
|
|
}
|
|
|
|
|
2014-03-14 21:40:06 +01:00
|
|
|
namespace {
|
|
|
|
class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl {
|
|
|
|
public:
|
2014-04-25 21:21:34 +02:00
|
|
|
explicit ColumnFamilyHandleImplDummy(int id)
|
2014-03-14 21:40:06 +01:00
|
|
|
: ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), id_(id) {}
|
|
|
|
uint32_t GetID() const override { return id_; }
|
2016-07-26 00:06:11 +02:00
|
|
|
const Comparator* GetComparator() const override {
|
2014-09-22 20:37:35 +02:00
|
|
|
return BytewiseComparator();
|
|
|
|
}
|
2014-03-14 21:40:06 +01:00
|
|
|
|
|
|
|
private:
|
|
|
|
uint32_t id_;
|
|
|
|
};
|
|
|
|
} // namespace anonymous
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, ColumnFamiliesBatchTest) {
|
2014-01-07 23:41:42 +01:00
|
|
|
WriteBatch batch;
|
2014-03-14 21:40:06 +01:00
|
|
|
ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8);
|
|
|
|
batch.Put(&zero, Slice("foo"), Slice("bar"));
|
|
|
|
batch.Put(&two, Slice("twofoo"), Slice("bar2"));
|
|
|
|
batch.Put(&eight, Slice("eightfoo"), Slice("bar8"));
|
|
|
|
batch.Delete(&eight, Slice("eightfoo"));
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.SingleDelete(&two, Slice("twofoo"));
|
2016-08-16 17:16:04 +02:00
|
|
|
batch.DeleteRange(&two, Slice("3foo"), Slice("4foo"));
|
2014-03-14 21:40:06 +01:00
|
|
|
batch.Merge(&three, Slice("threethree"), Slice("3three"));
|
|
|
|
batch.Put(&zero, Slice("foo"), Slice("bar"));
|
2014-01-07 23:41:42 +01:00
|
|
|
batch.Merge(Slice("omom"), Slice("nom"));
|
|
|
|
|
|
|
|
TestHandler handler;
|
|
|
|
batch.Iterate(&handler);
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Put(foo, bar)"
|
|
|
|
"PutCF(2, twofoo, bar2)"
|
|
|
|
"PutCF(8, eightfoo, bar8)"
|
|
|
|
"DeleteCF(8, eightfoo)"
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
"SingleDeleteCF(2, twofoo)"
|
2016-08-16 17:16:04 +02:00
|
|
|
"DeleteRangeCF(2, 3foo, 4foo)"
|
2014-01-07 23:41:42 +01:00
|
|
|
"MergeCF(3, threethree, 3three)"
|
|
|
|
"Put(foo, bar)"
|
|
|
|
"Merge(omom, nom)",
|
|
|
|
handler.seen);
|
|
|
|
}
|
|
|
|
|
2015-07-14 19:35:35 +02:00
|
|
|
#ifndef ROCKSDB_LITE
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(WriteBatchTest, ColumnFamiliesBatchWithIndexTest) {
|
2014-09-22 20:37:35 +02:00
|
|
|
WriteBatchWithIndex batch;
|
2014-08-19 00:19:17 +02:00
|
|
|
ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8);
|
|
|
|
batch.Put(&zero, Slice("foo"), Slice("bar"));
|
|
|
|
batch.Put(&two, Slice("twofoo"), Slice("bar2"));
|
|
|
|
batch.Put(&eight, Slice("eightfoo"), Slice("bar8"));
|
|
|
|
batch.Delete(&eight, Slice("eightfoo"));
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
batch.SingleDelete(&two, Slice("twofoo"));
|
2016-08-16 17:16:04 +02:00
|
|
|
batch.DeleteRange(&two, Slice("twofoo"), Slice("threefoo"));
|
2014-08-19 00:19:17 +02:00
|
|
|
batch.Merge(&three, Slice("threethree"), Slice("3three"));
|
|
|
|
batch.Put(&zero, Slice("foo"), Slice("bar"));
|
|
|
|
batch.Merge(Slice("omom"), Slice("nom"));
|
|
|
|
|
|
|
|
std::unique_ptr<WBWIIterator> iter;
|
|
|
|
|
|
|
|
iter.reset(batch.NewIterator(&eight));
|
|
|
|
iter->Seek("eightfoo");
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("eightfoo", iter->Entry().key.ToString());
|
|
|
|
ASSERT_EQ("bar8", iter->Entry().value.ToString());
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kDeleteRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("eightfoo", iter->Entry().key.ToString());
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(!iter->Valid());
|
|
|
|
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
iter.reset(batch.NewIterator(&two));
|
|
|
|
iter->Seek("twofoo");
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("twofoo", iter->Entry().key.ToString());
|
|
|
|
ASSERT_EQ("bar2", iter->Entry().value.ToString());
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kSingleDeleteRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("twofoo", iter->Entry().key.ToString());
|
|
|
|
|
2016-08-16 17:16:04 +02:00
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kDeleteRangeRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("twofoo", iter->Entry().key.ToString());
|
|
|
|
ASSERT_EQ("threefoo", iter->Entry().value.ToString());
|
|
|
|
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(!iter->Valid());
|
|
|
|
|
2014-08-19 00:19:17 +02:00
|
|
|
iter.reset(batch.NewIterator());
|
|
|
|
iter->Seek("gggg");
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kMergeRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("omom", iter->Entry().key.ToString());
|
|
|
|
ASSERT_EQ("nom", iter->Entry().value.ToString());
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(!iter->Valid());
|
|
|
|
|
|
|
|
iter.reset(batch.NewIterator(&zero));
|
|
|
|
iter->Seek("foo");
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("foo", iter->Entry().key.ToString());
|
|
|
|
ASSERT_EQ("bar", iter->Entry().value.ToString());
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("foo", iter->Entry().key.ToString());
|
|
|
|
ASSERT_EQ("bar", iter->Entry().value.ToString());
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_EQ(WriteType::kMergeRecord, iter->Entry().type);
|
|
|
|
ASSERT_EQ("omom", iter->Entry().key.ToString());
|
|
|
|
ASSERT_EQ("nom", iter->Entry().value.ToString());
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
ASSERT_TRUE(!iter->Valid());
|
|
|
|
|
|
|
|
TestHandler handler;
|
|
|
|
batch.GetWriteBatch()->Iterate(&handler);
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Put(foo, bar)"
|
|
|
|
"PutCF(2, twofoo, bar2)"
|
|
|
|
"PutCF(8, eightfoo, bar8)"
|
|
|
|
"DeleteCF(8, eightfoo)"
|
Support for SingleDelete()
Summary:
This patch fixes #7460559. It introduces SingleDelete as a new database
operation. This operation can be used to delete keys that were never
overwritten (no put following another put of the same key). If an overwritten
key is single deleted the behavior is undefined. Single deletion of a
non-existent key has no effect but multiple consecutive single deletions are
not allowed (see limitations).
In contrast to the conventional Delete() operation, the deletion entry is
removed along with the value when the two are lined up in a compaction. Note:
The semantics are similar to @igor's prototype that allowed to have this
behavior on the granularity of a column family (
https://reviews.facebook.net/D42093 ). This new patch, however, is more
aggressive when it comes to removing tombstones: It removes the SingleDelete
together with the value whenever there is no snapshot between them while the
older patch only did this when the sequence number of the deletion was older
than the earliest snapshot.
Most of the complex additions are in the Compaction Iterator, all other changes
should be relatively straightforward. The patch also includes basic support for
single deletions in db_stress and db_bench.
Limitations:
- Not compatible with cuckoo hash tables
- Single deletions cannot be used in combination with merges and normal
deletions on the same key (other keys are not affected by this)
- Consecutive single deletions are currently not allowed (and older version of
this patch supported this so it could be resurrected if needed)
Test Plan: make all check
Reviewers: yhchiang, sdong, rven, anthony, yoshinorim, igor
Reviewed By: igor
Subscribers: maykov, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D43179
2015-09-17 20:42:56 +02:00
|
|
|
"SingleDeleteCF(2, twofoo)"
|
2016-08-16 17:16:04 +02:00
|
|
|
"DeleteRangeCF(2, twofoo, threefoo)"
|
2014-08-19 00:19:17 +02:00
|
|
|
"MergeCF(3, threethree, 3three)"
|
|
|
|
"Put(foo, bar)"
|
|
|
|
"Merge(omom, nom)",
|
|
|
|
handler.seen);
|
|
|
|
}
|
2015-07-14 19:35:35 +02:00
|
|
|
#endif // !ROCKSDB_LITE
|
2014-08-19 00:19:17 +02:00
|
|
|
|
2015-07-11 05:15:45 +02:00
|
|
|
TEST_F(WriteBatchTest, SavePointTest) {
|
|
|
|
Status s;
|
|
|
|
WriteBatch batch;
|
|
|
|
batch.SetSavePoint();
|
|
|
|
|
|
|
|
batch.Put("A", "a");
|
|
|
|
batch.Put("B", "b");
|
|
|
|
batch.SetSavePoint();
|
|
|
|
|
|
|
|
batch.Put("C", "c");
|
|
|
|
batch.Delete("A");
|
|
|
|
batch.SetSavePoint();
|
|
|
|
batch.SetSavePoint();
|
|
|
|
|
|
|
|
ASSERT_OK(batch.RollbackToSavePoint());
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Delete(A)@3"
|
|
|
|
"Put(A, a)@0"
|
|
|
|
"Put(B, b)@1"
|
|
|
|
"Put(C, c)@2",
|
|
|
|
PrintContents(&batch));
|
|
|
|
|
|
|
|
ASSERT_OK(batch.RollbackToSavePoint());
|
|
|
|
ASSERT_OK(batch.RollbackToSavePoint());
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Put(A, a)@0"
|
|
|
|
"Put(B, b)@1",
|
|
|
|
PrintContents(&batch));
|
|
|
|
|
|
|
|
batch.Delete("A");
|
|
|
|
batch.Put("B", "bb");
|
|
|
|
|
|
|
|
ASSERT_OK(batch.RollbackToSavePoint());
|
|
|
|
ASSERT_EQ("", PrintContents(&batch));
|
|
|
|
|
|
|
|
s = batch.RollbackToSavePoint();
|
|
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
|
|
ASSERT_EQ("", PrintContents(&batch));
|
|
|
|
|
|
|
|
batch.Put("D", "d");
|
|
|
|
batch.Delete("A");
|
|
|
|
|
|
|
|
batch.SetSavePoint();
|
|
|
|
|
|
|
|
batch.Put("A", "aaa");
|
|
|
|
|
|
|
|
ASSERT_OK(batch.RollbackToSavePoint());
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Delete(A)@1"
|
|
|
|
"Put(D, d)@0",
|
|
|
|
PrintContents(&batch));
|
|
|
|
|
|
|
|
batch.SetSavePoint();
|
|
|
|
|
|
|
|
batch.Put("D", "d");
|
|
|
|
batch.Delete("A");
|
|
|
|
|
|
|
|
ASSERT_OK(batch.RollbackToSavePoint());
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Delete(A)@1"
|
|
|
|
"Put(D, d)@0",
|
|
|
|
PrintContents(&batch));
|
|
|
|
|
|
|
|
s = batch.RollbackToSavePoint();
|
|
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
|
|
ASSERT_EQ(
|
|
|
|
"Delete(A)@1"
|
|
|
|
"Put(D, d)@0",
|
|
|
|
PrintContents(&batch));
|
|
|
|
|
|
|
|
WriteBatch batch2;
|
|
|
|
|
|
|
|
s = batch2.RollbackToSavePoint();
|
|
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
|
|
ASSERT_EQ("", PrintContents(&batch2));
|
|
|
|
|
|
|
|
batch2.Delete("A");
|
|
|
|
batch2.SetSavePoint();
|
|
|
|
|
|
|
|
s = batch2.RollbackToSavePoint();
|
|
|
|
ASSERT_OK(s);
|
|
|
|
ASSERT_EQ("Delete(A)@0", PrintContents(&batch2));
|
|
|
|
|
|
|
|
batch2.Clear();
|
|
|
|
ASSERT_EQ("", PrintContents(&batch2));
|
|
|
|
|
|
|
|
batch2.SetSavePoint();
|
|
|
|
|
|
|
|
batch2.Delete("B");
|
|
|
|
ASSERT_EQ("Delete(B)@0", PrintContents(&batch2));
|
|
|
|
|
|
|
|
batch2.SetSavePoint();
|
|
|
|
s = batch2.RollbackToSavePoint();
|
|
|
|
ASSERT_OK(s);
|
|
|
|
ASSERT_EQ("Delete(B)@0", PrintContents(&batch2));
|
|
|
|
|
|
|
|
s = batch2.RollbackToSavePoint();
|
|
|
|
ASSERT_OK(s);
|
|
|
|
ASSERT_EQ("", PrintContents(&batch2));
|
|
|
|
|
|
|
|
s = batch2.RollbackToSavePoint();
|
|
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
|
|
ASSERT_EQ("", PrintContents(&batch2));
|
2017-05-03 19:54:07 +02:00
|
|
|
|
|
|
|
WriteBatch batch3;
|
|
|
|
|
|
|
|
s = batch3.PopSavePoint();
|
|
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
|
|
ASSERT_EQ("", PrintContents(&batch3));
|
|
|
|
|
|
|
|
batch3.SetSavePoint();
|
|
|
|
batch3.Delete("A");
|
|
|
|
|
|
|
|
s = batch3.PopSavePoint();
|
|
|
|
ASSERT_OK(s);
|
|
|
|
ASSERT_EQ("Delete(A)@0", PrintContents(&batch3));
|
2015-07-11 05:15:45 +02:00
|
|
|
}
|
|
|
|
|
2017-04-11 00:38:34 +02:00
|
|
|
TEST_F(WriteBatchTest, MemoryLimitTest) {
|
|
|
|
Status s;
|
|
|
|
// The header size is 12 bytes. The two Puts take 8 bytes which gives total
|
|
|
|
// of 12 + 8 * 2 = 28 bytes.
|
|
|
|
WriteBatch batch(0, 28);
|
|
|
|
|
|
|
|
ASSERT_OK(batch.Put("a", "...."));
|
|
|
|
ASSERT_OK(batch.Put("b", "...."));
|
|
|
|
s = batch.Put("c", "....");
|
|
|
|
ASSERT_TRUE(s.IsMemoryLimit());
|
|
|
|
}
|
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
} // namespace rocksdb
|
2011-03-18 23:37:00 +01:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2015-03-17 22:08:00 +01:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|