Added methods to write small ints to bit streams.

Summary: Added BitStreamPutInt() and BitStreamGetInt() which take a stream of chars and can write integers of arbitrary bit sizes to that stream at arbitrary positions. There are also convenience versions of these functions that take std::strings and leveldb::Slices.

Test Plan: make check

Reviewers: sheki, vamsi, dhruba, emayanke

Reviewed By: vamsi

CC: leveldb

Differential Revision: https://reviews.facebook.net/D7071
This commit is contained in:
Kosie van der Merwe 2012-12-07 10:42:19 -08:00
parent c847a31727
commit 0eb0c9bb82
3 changed files with 219 additions and 0 deletions

View File

@ -4,6 +4,8 @@
#include "util/coding.h" #include "util/coding.h"
#include <algorithm>
namespace leveldb { namespace leveldb {
void EncodeFixed32(char* buf, uint32_t value) { void EncodeFixed32(char* buf, uint32_t value) {
@ -191,4 +193,102 @@ bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
} }
} }
void BitStreamPutInt(char* dst, size_t dstlen, size_t offset,
uint32_t bits, uint64_t value) {
assert((offset + bits + 7)/8 <= dstlen);
assert(bits <= 64);
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
size_t byteOffset = offset / 8;
size_t bitOffset = offset % 8;
// This prevents unused variable warnings when compiling.
#ifndef NDEBUG
// Store truncated value.
uint64_t origValue = (bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value;
uint32_t origBits = bits;
#endif
while (bits > 0) {
size_t bitsToGet = std::min<size_t>(bits, 8 - bitOffset);
unsigned char mask = ((1 << bitsToGet) - 1);
ptr[byteOffset] = (ptr[byteOffset] & ~(mask << bitOffset)) +
((value & mask) << bitOffset);
value >>= bitsToGet;
byteOffset += 1;
bitOffset = 0;
bits -= bitsToGet;
}
assert(origValue == BitStreamGetInt(dst, dstlen, offset, origBits));
}
uint64_t BitStreamGetInt(const char* src, size_t srclen, size_t offset,
uint32_t bits) {
assert((offset + bits + 7)/8 <= srclen);
assert(bits <= 64);
const unsigned char* ptr = reinterpret_cast<const unsigned char*>(src);
uint64_t result = 0;
size_t byteOffset = offset / 8;
size_t bitOffset = offset % 8;
size_t shift = 0;
while (bits > 0) {
size_t bitsToGet = std::min<size_t>(bits, 8 - bitOffset);
unsigned char mask = ((1 << bitsToGet) - 1);
result += (uint64_t)((ptr[byteOffset] >> bitOffset) & mask) << shift;
shift += bitsToGet;
byteOffset += 1;
bitOffset = 0;
bits -= bitsToGet;
}
return result;
}
void BitStreamPutInt(std::string* dst, size_t offset, uint32_t bits,
uint64_t value) {
assert((offset + bits + 7)/8 <= dst->size());
const size_t kTmpBufLen = sizeof(value) + 1;
char tmpBuf[kTmpBufLen];
// Number of bytes of tmpBuf being used
const size_t kUsedBytes = (offset%8 + bits)/8;
// Copy relevant parts of dst to tmpBuf
for (size_t idx = 0; idx <= kUsedBytes; ++idx) {
tmpBuf[idx] = (*dst)[offset/8 + idx];
}
BitStreamPutInt(tmpBuf, kTmpBufLen, offset%8, bits, value);
// Copy tmpBuf back to dst
for (size_t idx = 0; idx <= kUsedBytes; ++idx) {
(*dst)[offset/8 + idx] = tmpBuf[idx];
}
// Do the check here too as we are working with a buffer.
assert(((bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value) ==
BitStreamGetInt(dst, offset, bits));
}
uint64_t BitStreamGetInt(const std::string* src, size_t offset,
uint32_t bits) {
return BitStreamGetInt(src->data(), src->size(), offset, bits);
}
uint64_t BitStreamGetInt(const Slice* src, size_t offset,
uint32_t bits) {
return BitStreamGetInt(src->data(), src->size(), offset, bits);
}
} // namespace leveldb } // namespace leveldb

View File

@ -99,6 +99,32 @@ inline const char* GetVarint32Ptr(const char* p,
return GetVarint32PtrFallback(p, limit, value); return GetVarint32PtrFallback(p, limit, value);
} }
// Writes an unsigned integer with bits number of bits with its least
// significant bit at offset.
// Bits are numbered from 0 to 7 in the first byte, 8 to 15 in the second and
// so on.
// value is truncated to the bits number of least significant bits.
// REQUIRES: (offset+bits+7)/8 <= dstlen
// REQUIRES: bits <= 64
extern void BitStreamPutInt(char* dst, size_t dstlen, size_t offset,
uint32_t bits, uint64_t value);
// Reads an unsigned integer with bits number of bits with its least
// significant bit at offset.
// Bits are numbered in the same way as ByteStreamPutInt().
// REQUIRES: (offset+bits+7)/8 <= srclen
// REQUIRES: bits <= 64
extern uint64_t BitStreamGetInt(const char* src, size_t srclen, size_t offset,
uint32_t bits);
// Convenience functions
extern void BitStreamPutInt(std::string* dst, size_t offset, uint32_t bits,
uint64_t value);
extern uint64_t BitStreamGetInt(const std::string* src, size_t offset,
uint32_t bits);
extern uint64_t BitStreamGetInt(const Slice* src, size_t offset,
uint32_t bits);
} // namespace leveldb } // namespace leveldb
#endif // STORAGE_LEVELDB_UTIL_CODING_H_ #endif // STORAGE_LEVELDB_UTIL_CODING_H_

View File

@ -189,6 +189,99 @@ TEST(Coding, Strings) {
ASSERT_EQ("", input.ToString()); ASSERT_EQ("", input.ToString());
} }
TEST(Coding, BitStream) {
const int kNumBytes = 10;
char bytes[kNumBytes+1];
for (int i = 0; i < kNumBytes + 1; ++i) {
bytes[i] = '\0';
}
// Simple byte aligned test.
for (int i = 0; i < kNumBytes; ++i) {
BitStreamPutInt(bytes, kNumBytes, i*8, 8, 255-i);
ASSERT_EQ((unsigned char)bytes[i], (unsigned char)(255-i));
}
for (int i = 0; i < kNumBytes; ++i) {
ASSERT_EQ(BitStreamGetInt(bytes, kNumBytes, i*8, 8), (uint32_t)(255-i));
}
ASSERT_EQ(bytes[kNumBytes+1], '\0');
// Write and read back at strange offsets
for (int i = 0; i < kNumBytes + 1; ++i) {
bytes[i] = '\0';
}
for (int i = 0; i < kNumBytes; ++i) {
BitStreamPutInt(bytes, kNumBytes, i*5+1, 4, (i * 7) % (1 << 4));
}
for (int i = 0; i < kNumBytes; ++i) {
ASSERT_EQ(BitStreamGetInt(bytes, kNumBytes, i*5+1, 4),
(uint32_t)((i * 7) % (1 << 4)));
}
ASSERT_EQ(bytes[kNumBytes+1], '\0');
// Create 11011011 as a bit pattern
for (int i = 0; i < kNumBytes + 1; ++i) {
bytes[i] = '\0';
}
for (int i = 0; i < kNumBytes; ++i) {
BitStreamPutInt(bytes, kNumBytes, i*8, 2, 3);
BitStreamPutInt(bytes, kNumBytes, i*8+3, 2, 3);
BitStreamPutInt(bytes, kNumBytes, i*8+6, 2, 3);
ASSERT_EQ((unsigned char)bytes[i],
(unsigned char)(3 + (3 << 3) + (3 << 6)));
}
ASSERT_EQ(bytes[kNumBytes+1], '\0');
// Test large values
for (int i = 0; i < kNumBytes + 1; ++i) {
bytes[i] = '\0';
}
BitStreamPutInt(bytes, kNumBytes, 0, 64, (uint64_t)(-1));
for (int i = 0; i < 64/8; ++i) {
ASSERT_EQ((unsigned char)bytes[i],
(unsigned char)(255));
}
ASSERT_EQ(bytes[64/8+1], '\0');
}
TEST(Coding, BitStreamConvenienceFuncs) {
std::string bytes(1, '\0');
// Check that independent changes to byte are preserved.
BitStreamPutInt(&bytes, 0, 2, 3);
BitStreamPutInt(&bytes, 3, 2, 3);
BitStreamPutInt(&bytes, 6, 2, 3);
ASSERT_EQ((unsigned char)bytes[0], (unsigned char)(3 + (3 << 3) + (3 << 6)));
ASSERT_EQ(BitStreamGetInt(&bytes, 0, 2), 3u);
ASSERT_EQ(BitStreamGetInt(&bytes, 3, 2), 3u);
ASSERT_EQ(BitStreamGetInt(&bytes, 6, 2), 3u);
Slice slice(bytes);
ASSERT_EQ(BitStreamGetInt(&slice, 0, 2), 3u);
ASSERT_EQ(BitStreamGetInt(&slice, 3, 2), 3u);
ASSERT_EQ(BitStreamGetInt(&slice, 6, 2), 3u);
// Test overlapping crossing over byte boundaries
bytes = std::string(2, '\0');
BitStreamPutInt(&bytes, 6, 4, 15);
ASSERT_EQ((unsigned char)bytes[0], 3 << 6);
ASSERT_EQ((unsigned char)bytes[1], 3);
ASSERT_EQ(BitStreamGetInt(&bytes, 6, 4), 15u);
slice = Slice(bytes);
ASSERT_EQ(BitStreamGetInt(&slice, 6, 4), 15u);
// Test 64-bit number
bytes = std::string(64/8, '\0');
BitStreamPutInt(&bytes, 0, 64, (uint64_t)(-1));
ASSERT_EQ(BitStreamGetInt(&bytes, 0, 64), (uint64_t)(-1));
slice = Slice(bytes);
ASSERT_EQ(BitStreamGetInt(&slice, 0, 64), (uint64_t)(-1));
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {