From a92860a0462d083fdd45a905f804133b74cfbb4b Mon Sep 17 00:00:00 2001 From: levlam Date: Wed, 21 Mar 2018 17:54:39 +0300 Subject: [PATCH] Better strip_empty_characters. GitOrigin-RevId: 35863d02683e75da361712647d643866ae4800cf --- td/telegram/MessageEntity.cpp | 18 +++++++++--------- td/telegram/misc.cpp | 4 ++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index 7af90fb0..8830101a 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -138,14 +138,14 @@ vector> get_text_entities_object(const vector< return result; } -static bool is_word_character(uint32 a) { - switch (get_unicode_simple_category(a)) { +static bool is_word_character(uint32 code) { + switch (get_unicode_simple_category(code)) { case UnicodeSimpleCategory::Letter: case UnicodeSimpleCategory::DecimalNumber: case UnicodeSimpleCategory::Number: return true; default: - return a == '_'; + return code == '_'; } } @@ -159,16 +159,16 @@ static bool is_word_boundary(uint32 a, uint32 b) { } */ -static bool is_alpha_digit(uint32 a) { - return ('0' <= a && a <= '9') || ('a' <= a && a <= 'z') || ('A' <= a && a <= 'Z'); +static bool is_alpha_digit(uint32 code) { + return ('0' <= code && code <= '9') || ('a' <= code && code <= 'z') || ('A' <= code && code <= 'Z'); } -static bool is_alpha_digit_or_underscore(uint32 a) { - return is_alpha_digit(a) || a == '_'; +static bool is_alpha_digit_or_underscore(uint32 code) { + return is_alpha_digit(code) || code == '_'; } -static bool is_alpha_digit_or_underscore_or_minus(uint32 a) { - return is_alpha_digit_or_underscore(a) || a == '-'; +static bool is_alpha_digit_or_underscore_or_minus(uint32 code) { + return is_alpha_digit_or_underscore(code) || code == '-'; } // This functions just implements corresponding regexps diff --git a/td/telegram/misc.cpp b/td/telegram/misc.cpp index 28290627..fc72546c 100644 --- a/td/telegram/misc.cpp +++ b/td/telegram/misc.cpp @@ -186,6 +186,10 @@ string strip_empty_characters(string str, size_t max_length) { return string(); } + if (trimmed[i] == ' ' || trimmed[i] == '\n') { + i++; + continue; + } if (static_cast(trimmed[i]) == 0xE2 && static_cast(trimmed[i + 1]) == 0x80 && (static_cast(trimmed[i + 2]) == 0x8C || static_cast(trimmed[i + 2]) == 0x8D || static_cast(trimmed[i + 2]) == 0xAE)) {