d51dc96a79
Summary: Experiments on column-aware encodings. Supported features: 1) extract data blocks from SST file and encode with specified encodings; 2) Decode encoded data back into row format; 3) Directly extract data blocks and write in row format (without prefix encoding); 4) Get column distribution statistics for column format; 5) Dump data blocks separated by columns in human-readable format. There is still on-going work on this diff. More refactoring is necessary. Test Plan: Wrote tests in `column_aware_encoding_test.cc`. More tests should be added. Reviewers: sdong Reviewed By: sdong Subscribers: arahut, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D60027
118 lines
3.6 KiB
C++
118 lines
3.6 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
#pragma once
|
|
#include <endian.h>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
#include "util/coding.h"
|
|
#include "utilities/col_buf_encoder.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
struct ColDeclaration;
|
|
|
|
// ColBufDecoder is a class to decode column buffers. It can be populated from a
|
|
// ColDeclaration. Before starting decoding, a Init() method should be called.
|
|
// Each time it takes a column value into Decode() method.
|
|
class ColBufDecoder {
|
|
public:
|
|
virtual ~ColBufDecoder() = 0;
|
|
virtual size_t Init(const char* src) { return 0; }
|
|
virtual size_t Decode(const char* src, char** dest) = 0;
|
|
static ColBufDecoder* NewColBufDecoder(const ColDeclaration& col_declaration);
|
|
|
|
protected:
|
|
std::string buffer_;
|
|
static inline bool IsRunLength(ColCompressionType type) {
|
|
return type == kColRle || type == kColRleVarint ||
|
|
type == kColRleDeltaVarint || type == kColRleDict;
|
|
}
|
|
};
|
|
|
|
class FixedLengthColBufDecoder : public ColBufDecoder {
|
|
public:
|
|
explicit FixedLengthColBufDecoder(
|
|
size_t size, ColCompressionType col_compression_type = kColNoCompression,
|
|
bool nullable = false, bool big_endian = false)
|
|
: size_(size),
|
|
col_compression_type_(col_compression_type),
|
|
nullable_(nullable),
|
|
big_endian_(big_endian) {}
|
|
|
|
size_t Init(const char* src) override;
|
|
size_t Decode(const char* src, char** dest) override;
|
|
~FixedLengthColBufDecoder() {}
|
|
|
|
private:
|
|
size_t size_;
|
|
ColCompressionType col_compression_type_;
|
|
bool nullable_;
|
|
bool big_endian_;
|
|
|
|
// for decoding
|
|
std::vector<uint64_t> dict_vec_;
|
|
uint64_t remain_runs_;
|
|
uint64_t run_val_;
|
|
uint64_t last_val_;
|
|
};
|
|
|
|
class LongFixedLengthColBufDecoder : public ColBufDecoder {
|
|
public:
|
|
LongFixedLengthColBufDecoder(size_t size, bool nullable)
|
|
: size_(size), nullable_(nullable) {}
|
|
|
|
size_t Decode(const char* src, char** dest) override;
|
|
~LongFixedLengthColBufDecoder() {}
|
|
|
|
private:
|
|
size_t size_;
|
|
bool nullable_;
|
|
};
|
|
|
|
class VariableLengthColBufDecoder : public ColBufDecoder {
|
|
public:
|
|
size_t Decode(const char* src, char** dest) override;
|
|
~VariableLengthColBufDecoder() {}
|
|
};
|
|
|
|
class VariableChunkColBufDecoder : public VariableLengthColBufDecoder {
|
|
public:
|
|
size_t Init(const char* src) override;
|
|
size_t Decode(const char* src, char** dest) override;
|
|
explicit VariableChunkColBufDecoder(ColCompressionType col_compression_type)
|
|
: col_compression_type_(col_compression_type) {}
|
|
VariableChunkColBufDecoder() : col_compression_type_(kColNoCompression) {}
|
|
|
|
private:
|
|
ColCompressionType col_compression_type_;
|
|
std::unordered_map<uint64_t, uint64_t> dictionary_;
|
|
std::vector<uint64_t> dict_vec_;
|
|
};
|
|
|
|
struct KVPairColBufDecoders {
|
|
std::vector<std::unique_ptr<ColBufDecoder>> key_col_bufs;
|
|
std::vector<std::unique_ptr<ColBufDecoder>> value_col_bufs;
|
|
std::unique_ptr<ColBufDecoder> value_checksum_buf;
|
|
|
|
explicit KVPairColBufDecoders(const KVPairColDeclarations& kvp_cd) {
|
|
for (auto kcd : *kvp_cd.key_col_declarations) {
|
|
key_col_bufs.emplace_back(
|
|
std::move(ColBufDecoder::NewColBufDecoder(kcd)));
|
|
}
|
|
for (auto vcd : *kvp_cd.value_col_declarations) {
|
|
value_col_bufs.emplace_back(
|
|
std::move(ColBufDecoder::NewColBufDecoder(vcd)));
|
|
}
|
|
value_checksum_buf.reset(
|
|
ColBufDecoder::NewColBufDecoder(*kvp_cd.value_checksum_declaration));
|
|
}
|
|
};
|
|
} // namespace rocksdb
|