From c816837736bfc6c29f2260ea86641ea9dd65b8ac Mon Sep 17 00:00:00 2001 From: levlam Date: Fri, 19 Jan 2018 15:43:00 +0300 Subject: [PATCH] Utils: is_base64 function. GitOrigin-RevId: 6b5d3079d812cb96d080a444e964067a79472d16 --- tdutils/td/utils/base64.cpp | 96 ++++++++++++++++++++++++++----------- tdutils/td/utils/base64.h | 5 +- tdutils/test/misc.cpp | 12 ++++- 3 files changed, 82 insertions(+), 31 deletions(-) diff --git a/tdutils/td/utils/base64.cpp b/tdutils/td/utils/base64.cpp index 8e4c26191..02ee20233 100644 --- a/tdutils/td/utils/base64.cpp +++ b/tdutils/td/utils/base64.cpp @@ -7,8 +7,6 @@ #include "td/utils/base64.h" #include "td/utils/common.h" -#include "td/utils/logging.h" -#include "td/utils/misc.h" #include "td/utils/Slice.h" #include "td/utils/Status.h" @@ -25,7 +23,7 @@ string base64_encode(Slice input) { base64.reserve((input.size() + 2) / 3 * 4); for (size_t i = 0; i < input.size();) { size_t left = std::min(input.size() - i, static_cast(3)); - size_t c = input.ubegin()[i++] << 16; + int c = input.ubegin()[i++] << 16; base64 += symbols64[c >> 18]; if (left != 1) { c |= input.ubegin()[i++] << 8; @@ -49,24 +47,25 @@ string base64_encode(Slice input) { } static unsigned char char_to_value[256]; -static bool init_char_to_value() { - std::fill(std::begin(char_to_value), std::end(char_to_value), 64); - for (unsigned char i = 0; i < 64; i++) { - char_to_value[static_cast(symbols64[i])] = i; - } - return true; +static void init_base64_table() { + static bool is_inited = []() { + std::fill(std::begin(char_to_value), std::end(char_to_value), 64); + for (unsigned char i = 0; i < 64; i++) { + char_to_value[static_cast(symbols64[i])] = i; + } + return true; + }(); } Result base64_decode(Slice base64) { - static bool is_inited = init_char_to_value(); - CHECK(is_inited); + init_base64_table(); if ((base64.size() & 3) != 0) { return Status::Error("Wrong string length"); } size_t padding_length = 0; - while (base64.size() > 0 && base64.back() == '=') { + while (!base64.empty() && base64.back() == '=') { base64.remove_suffix(1); padding_length++; } @@ -78,7 +77,7 @@ Result base64_decode(Slice base64) { output.reserve(((base64.size() + 3) >> 2) * 3); for (size_t i = 0; i < base64.size();) { size_t left = std::min(base64.size() - i, static_cast(4)); - size_t c = 0; + int c = 0; for (size_t t = 0; t < left; t++) { auto value = char_to_value[base64.ubegin()[i++]]; if (value == 64) { @@ -112,7 +111,7 @@ string base64url_encode(Slice input) { base64.reserve((input.size() + 2) / 3 * 4); for (size_t i = 0; i < input.size();) { size_t left = std::min(input.size() - i, static_cast(3)); - size_t c = input.ubegin()[i++] << 16; + int c = input.ubegin()[i++] << 16; base64 += url_symbols64[c >> 18]; if (left != 1) { c |= input.ubegin()[i++] << 8; @@ -132,20 +131,21 @@ string base64url_encode(Slice input) { } static unsigned char url_char_to_value[256]; -static bool init_url_char_to_value() { - std::fill(std::begin(url_char_to_value), std::end(url_char_to_value), 64); - for (unsigned char i = 0; i < 64; i++) { - url_char_to_value[static_cast(url_symbols64[i])] = i; - } - return true; +static void init_base64url_table() { + static bool is_inited = []() { + std::fill(std::begin(url_char_to_value), std::end(url_char_to_value), 64); + for (unsigned char i = 0; i < 64; i++) { + url_char_to_value[static_cast(url_symbols64[i])] = i; + } + return true; + }(); } Result base64url_decode(Slice base64) { - static bool is_inited = init_url_char_to_value(); - CHECK(is_inited); + init_base64url_table(); size_t padding_length = 0; - while (base64.size() > 0 && base64.back() == '=') { + while (!base64.empty() && base64.back() == '=') { base64.remove_suffix(1); padding_length++; } @@ -161,7 +161,7 @@ Result base64url_decode(Slice base64) { output.reserve(((base64.size() + 3) >> 2) * 3); for (size_t i = 0; i < base64.size();) { size_t left = std::min(base64.size() - i, static_cast(4)); - size_t c = 0; + int c = 0; for (size_t t = 0; t < left; t++) { auto value = url_char_to_value[base64.ubegin()[i++]]; if (value == 64) { @@ -188,12 +188,50 @@ Result base64url_decode(Slice base64) { return output; } -string base64_filter(Slice slice) { +bool is_base64(Slice input) { + if ((input.size() & 3) != 0) { + return false; + } + + size_t padding_length = 0; + while (!input.empty() && input.back() == '=') { + input.remove_suffix(1); + padding_length++; + } + if (padding_length >= 3) { + return false; + } + + init_base64_table(); + for (size_t i = 0; i < input.size(); i++) { + if (char_to_value[input.ubegin()[i]] == 64) { + return false; + } + } + + if ((input.size() & 3) == 2) { + auto value = char_to_value[input.back()]; + if ((value & 15) != 0) { + return false; + } + } + if ((input.size() & 3) == 3) { + auto value = char_to_value[input.back()]; + if ((value & 3) != 0) { + return false; + } + } + + return true; +} + +string base64_filter(Slice input) { string res; - res.reserve(slice.size()); - for (auto c : slice) { - if (is_alnum(c) || c == '+' || c == '/' || c == '=') { - res += c; + res.reserve(input.size()); + init_base64_table(); + for (size_t i = 0; i < input.size(); i++) { + if (char_to_value[input.ubegin()[i]] != 64 || input[i] == '=') { + res += input[i]; } } return res; diff --git a/tdutils/td/utils/base64.h b/tdutils/td/utils/base64.h index 17ee8d4f5..d7a98495b 100644 --- a/tdutils/td/utils/base64.h +++ b/tdutils/td/utils/base64.h @@ -18,5 +18,8 @@ Result base64_decode(Slice base64); string base64url_encode(Slice input); Result base64url_decode(Slice base64); -string base64_filter(Slice slice); +bool is_base64(Slice input); + +string base64_filter(Slice input); + } // namespace td diff --git a/tdutils/test/misc.cpp b/tdutils/test/misc.cpp index ede56ab62..a2f542553 100644 --- a/tdutils/test/misc.cpp +++ b/tdutils/test/misc.cpp @@ -115,7 +115,7 @@ TEST(Misc, errno_tls_bug) { } TEST(Misc, base64) { - for (int l = 0; l < 300000; l += l / 20 + 1) { + for (int l = 0; l < 300000; l += l / 20 + l / 1000 * 500 + 1) { for (int t = 0; t < 10; t++) { string s = rand_string(std::numeric_limits::min(), std::numeric_limits::max(), l); string encoded = base64url_encode(s); @@ -139,6 +139,16 @@ TEST(Misc, base64) { ASSERT_TRUE(base64_encode(" /'.;.';≤.];,].',[.;/,.;/]/..;!@#!*(%?::;!%\";") == "ICAgICAgLycuOy4nO+KJpC5dOyxdLicsWy47LywuOy9dLy4uOyFAIyEqKCU/" "Ojo7ISUiOw=="); + ASSERT_TRUE(is_base64("dGVzdA==") == true); + ASSERT_TRUE(is_base64("dGVzdB==") == false); + ASSERT_TRUE(is_base64("dGVzdA=") == false); + ASSERT_TRUE(is_base64("dGVzdA") == false); + ASSERT_TRUE(is_base64("dGVz") == true); + ASSERT_TRUE(is_base64("") == true); + ASSERT_TRUE(is_base64("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") == true); + ASSERT_TRUE(is_base64("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") == false); + ASSERT_TRUE(is_base64("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-/") == false); + ASSERT_TRUE(is_base64("====") == false); } TEST(Misc, to_integer) {