From 21700a5106c7be6f5c8bfe31f8f0cca622ad27ca Mon Sep 17 00:00:00 2001 From: Laurent Demailly Date: Tue, 29 Mar 2016 21:25:12 -0700 Subject: [PATCH] to/from hex refactor Summary: Expose the inverse of ToString(hex=true) on Slice: Slice::DecodeHex Refactor the other implementation of to/from hex in ldb_cmd.h to use the Slice version (Difference between the 2 is whether 0x is expected/produced in front of the hex string or not) Eliminated support for invalid odd length hex string - this is now invalid instead of having 1/2 byte set Added (inverse of HexToString) test for LDBCommand::StringToHex which also indirectly tests Slice::ToString(true) After moving the original implementation from ldb_cmd.h, updated it to much simpler/efficient version (originally/inspired from https://github.com/facebook/wdt/blob/master/util/EncryptionUtils.cpp#L140-L169 ) Test Plan: run tests Reviewers: uddipta, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D56121 --- include/rocksdb/slice.h | 8 ++++++ table/format.cc | 9 +----- tools/db_stress.cc | 6 +--- tools/ldb_cmd.h | 43 +++++++-------------------- tools/ldb_cmd_test.cc | 12 +++++--- util/slice.cc | 64 +++++++++++++++++++++++++++++++++++------ 6 files changed, 84 insertions(+), 58 deletions(-) diff --git a/include/rocksdb/slice.h b/include/rocksdb/slice.h index 3663716dc..38d494ed9 100644 --- a/include/rocksdb/slice.h +++ b/include/rocksdb/slice.h @@ -79,8 +79,16 @@ class Slice { } // Return a string that contains the copy of the referenced data. + // when hex is true, returns a string of twice the length hex encoded (0-9A-F) std::string ToString(bool hex = false) const; + // Decodes the current slice interpreted as an hexadecimal string into result, + // if successful returns true, if this isn't a valid hex string + // (e.g not coming from Slice::ToString(true)) DecodeHex returns false. + // This slice is expected to have an even number of 0-9A-F characters + // also accepts lowercase (a-f) + bool DecodeHex(std::string* result) const; + // Three-way comparison. Returns value: // < 0 iff "*this" < "b", // == 0 iff "*this" == "b", diff --git a/table/format.cc b/table/format.cc index bb028c99a..b20ecf8cf 100644 --- a/table/format.cc +++ b/table/format.cc @@ -59,14 +59,7 @@ std::string BlockHandle::ToString(bool hex) const { std::string handle_str; EncodeTo(&handle_str); if (hex) { - std::string result; - char buf[10]; - for (size_t i = 0; i < handle_str.size(); i++) { - snprintf(buf, sizeof(buf), "%02X", - static_cast(handle_str[i])); - result += buf; - } - return result; + return Slice(handle_str).ToString(true); } else { return handle_str; } diff --git a/tools/db_stress.cc b/tools/db_stress.cc index f77dc445f..1d13f00af 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -465,11 +465,7 @@ static std::string Key(int64_t val) { static std::string StringToHex(const std::string& str) { std::string result = "0x"; - char buf[10]; - for (size_t i = 0; i < str.length(); i++) { - snprintf(buf, 10, "%02X", (unsigned char)str[i]); - result += buf; - } + result.append(Slice(str).ToString(true)); return result; } diff --git a/tools/ldb_cmd.h b/tools/ldb_cmd.h index cc3814c2f..781fd3169 100644 --- a/tools/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -133,50 +133,27 @@ public: exec_state_.Reset(); } + // Consider using Slice::DecodeHex directly instead if you don't need the + // 0x prefix static string HexToString(const string& str) { + string result; std::string::size_type len = str.length(); - string parsed; - static const char* const hexas = "0123456789ABCDEF"; - parsed.reserve(len / 2); - if (len < 2 || str[0] != '0' || str[1] != 'x') { fprintf(stderr, "Invalid hex input %s. Must start with 0x\n", str.c_str()); throw "Invalid hex input"; } - - for (unsigned int i = 2; i < len; i += 2) { - char a = static_cast(toupper(str[i])); - const char* p = std::lower_bound(hexas, hexas + 16, a); - if (*p != a) { - throw "Invalid hex value"; - } - - if (i + 1 >= len) { - // if odd number of chars than we just hit end of string - parsed.push_back(static_cast(p - hexas)); - break; - } - - char b = static_cast(toupper(str[i + 1])); - const char* q = std::lower_bound(hexas, hexas + 16, b); - if (*q == b) { - // pairwise compute decimal value from hex - parsed.push_back(static_cast(((p - hexas) << 4) | (q - hexas))); - } else { - throw "Invalid hex value"; - } + if (!Slice(str.data() + 2, len - 2).DecodeHex(&result)) { + throw "Invalid hex input"; } - return parsed; + return result; } + // Consider using Slice::ToString(true) directly instead if + // you don't need the 0x prefix static string StringToHex(const string& str) { - string result = "0x"; - char buf[10]; - for (size_t i = 0; i < str.length(); i++) { - snprintf(buf, 10, "%02X", (unsigned char)str[i]); - result += buf; - } + string result("0x"); + result.append(Slice(str).ToString(true)); return result; } diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index 892f5843c..4df241aa7 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -6,29 +6,33 @@ #ifndef ROCKSDB_LITE #include "tools/ldb_cmd.h" +#include #include "util/testharness.h" class LdbCmdTest : public testing::Test {}; TEST_F(LdbCmdTest, HexToString) { // map input to expected outputs. + // odd number of "hex" half bytes doesn't make sense map> inputMap = { - {"0x7", {7}}, {"0x5050", {80, 80}}, {"0xFF", {-1}}, - {"0x1234", {18, 52}}, {"0xaa", {-86}}, {"0x123", {18, 3}}, + {"0x07", {7}}, {"0x5050", {80, 80}}, {"0xFF", {-1}}, + {"0x1234", {18, 52}}, {"0xaaAbAC", {-86, -85, -84}}, {"0x1203", {18, 3}}, }; for (const auto& inPair : inputMap) { auto actual = rocksdb::LDBCommand::HexToString(inPair.first); auto expected = inPair.second; for (unsigned int i = 0; i < actual.length(); i++) { - ASSERT_EQ(expected[i], static_cast(actual[i])); + EXPECT_EQ(expected[i], static_cast(actual[i])); } + auto reverse = rocksdb::LDBCommand::StringToHex(actual); + EXPECT_EQ(strcasecmp(inPair.first.c_str(), reverse.c_str()), 0); } } TEST_F(LdbCmdTest, HexToStringBadInputs) { const vector badInputs = { - "0xZZ", "123", "0xx5", "0x11G", "Ox12", "0xT", "0x1Q1", + "0xZZ", "123", "0xx5", "0x111G", "0x123", "Ox12", "0xT", "0x1Q1", }; for (const auto badInput : badInputs) { try { diff --git a/util/slice.cc b/util/slice.cc index d1ddb7cd7..22a7d7dd7 100644 --- a/util/slice.cc +++ b/util/slice.cc @@ -104,19 +104,40 @@ class NoopTransform : public SliceTransform { } -// Do not want to include the whole /port/port.h here for one define -#ifdef OS_WIN -#define snprintf _snprintf -#endif +// 2 small internal utility functions, for efficient hex conversions +// and no need for snprintf, toupper etc... +// Originally from wdt/util/EncryptionUtils.cpp - for ToString(true)/DecodeHex: +char toHex(unsigned char v) { + if (v <= 9) { + return '0' + v; + } + return 'A' + v - 10; +} +// most of the code is for validation/error check +int fromHex(char c) { + // toupper: + if (c >= 'a' && c <= 'f') { + c -= ('a' - 'A'); // aka 0x20 + } + // validation + if (c < '0' || (c > '9' && (c < 'A' || c > 'F'))) { + return -1; // invalid not 0-9A-F hex char + } + if (c <= '9') { + return c - '0'; + } + return c - 'A' + 10; +} // Return a string that contains the copy of the referenced data. std::string Slice::ToString(bool hex) const { std::string result; // RVO/NRVO/move if (hex) { - char buf[10]; - for (size_t i = 0; i < size_; i++) { - snprintf(buf, 10, "%02X", (unsigned char)data_[i]); - result += buf; + result.reserve(2 * size_); + for (size_t i = 0; i < size_; ++i) { + unsigned char c = data_[i]; + result.push_back(toHex(c >> 4)); + result.push_back(toHex(c & 0xf)); } return result; } else { @@ -125,6 +146,33 @@ std::string Slice::ToString(bool hex) const { } } +// Originally from tools/ldb_cmd.h +bool Slice::DecodeHex(std::string* result) const { + std::string::size_type len = size_; + if (len % 2) { + // Hex string must be even number of hex digits to get complete bytes back + return false; + } + if (!result) { + return false; + } + result->clear(); + result->reserve(len / 2); + + for (size_t i = 0; i < len;) { + int h1 = fromHex(data_[i++]); + if (h1 < 0) { + return false; + } + int h2 = fromHex(data_[i++]); + if (h2 < 0) { + return false; + } + result->push_back((h1 << 4) | h2); + } + return true; +} + const SliceTransform* NewFixedPrefixTransform(size_t prefix_len) { return new FixedPrefixTransform(prefix_len); }