rocksdb/utilities/col_buf_decoder.h
omegaga d51dc96a79 Experiments on column-aware encodings
Summary:
Experiments on column-aware encodings. Supported features: 1) extract data blocks from SST file and encode with specified encodings; 2) Decode encoded data back into row format; 3) Directly extract data blocks and write in row format (without prefix encoding); 4) Get column distribution statistics for column format; 5) Dump data blocks separated by columns in human-readable format.

There is still on-going work on this diff. More refactoring is necessary.

Test Plan: Wrote tests in `column_aware_encoding_test.cc`. More tests should be added.

Reviewers: sdong

Reviewed By: sdong

Subscribers: arahut, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D60027
2016-08-01 14:50:19 -07:00

118 lines
3.6 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
#include <endian.h>
#include <cstdio>
#include <cstring>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "util/coding.h"
#include "utilities/col_buf_encoder.h"
namespace rocksdb {
struct ColDeclaration;
// ColBufDecoder is a class to decode column buffers. It can be populated from a
// ColDeclaration. Before starting decoding, a Init() method should be called.
// Each time it takes a column value into Decode() method.
class ColBufDecoder {
public:
virtual ~ColBufDecoder() = 0;
virtual size_t Init(const char* src) { return 0; }
virtual size_t Decode(const char* src, char** dest) = 0;
static ColBufDecoder* NewColBufDecoder(const ColDeclaration& col_declaration);
protected:
std::string buffer_;
static inline bool IsRunLength(ColCompressionType type) {
return type == kColRle || type == kColRleVarint ||
type == kColRleDeltaVarint || type == kColRleDict;
}
};
class FixedLengthColBufDecoder : public ColBufDecoder {
public:
explicit FixedLengthColBufDecoder(
size_t size, ColCompressionType col_compression_type = kColNoCompression,
bool nullable = false, bool big_endian = false)
: size_(size),
col_compression_type_(col_compression_type),
nullable_(nullable),
big_endian_(big_endian) {}
size_t Init(const char* src) override;
size_t Decode(const char* src, char** dest) override;
~FixedLengthColBufDecoder() {}
private:
size_t size_;
ColCompressionType col_compression_type_;
bool nullable_;
bool big_endian_;
// for decoding
std::vector<uint64_t> dict_vec_;
uint64_t remain_runs_;
uint64_t run_val_;
uint64_t last_val_;
};
class LongFixedLengthColBufDecoder : public ColBufDecoder {
public:
LongFixedLengthColBufDecoder(size_t size, bool nullable)
: size_(size), nullable_(nullable) {}
size_t Decode(const char* src, char** dest) override;
~LongFixedLengthColBufDecoder() {}
private:
size_t size_;
bool nullable_;
};
class VariableLengthColBufDecoder : public ColBufDecoder {
public:
size_t Decode(const char* src, char** dest) override;
~VariableLengthColBufDecoder() {}
};
class VariableChunkColBufDecoder : public VariableLengthColBufDecoder {
public:
size_t Init(const char* src) override;
size_t Decode(const char* src, char** dest) override;
explicit VariableChunkColBufDecoder(ColCompressionType col_compression_type)
: col_compression_type_(col_compression_type) {}
VariableChunkColBufDecoder() : col_compression_type_(kColNoCompression) {}
private:
ColCompressionType col_compression_type_;
std::unordered_map<uint64_t, uint64_t> dictionary_;
std::vector<uint64_t> dict_vec_;
};
struct KVPairColBufDecoders {
std::vector<std::unique_ptr<ColBufDecoder>> key_col_bufs;
std::vector<std::unique_ptr<ColBufDecoder>> value_col_bufs;
std::unique_ptr<ColBufDecoder> value_checksum_buf;
explicit KVPairColBufDecoders(const KVPairColDeclarations& kvp_cd) {
for (auto kcd : *kvp_cd.key_col_declarations) {
key_col_bufs.emplace_back(
std::move(ColBufDecoder::NewColBufDecoder(kcd)));
}
for (auto vcd : *kvp_cd.value_col_declarations) {
value_col_bufs.emplace_back(
std::move(ColBufDecoder::NewColBufDecoder(vcd)));
}
value_checksum_buf.reset(
ColBufDecoder::NewColBufDecoder(*kvp_cd.value_checksum_declaration));
}
};
} // namespace rocksdb