Better strip_empty_characters.

GitOrigin-RevId: 35863d02683e75da361712647d643866ae4800cf
This commit is contained in:
levlam 2018-03-21 17:54:39 +03:00
parent cfe4d9bdce
commit a92860a046
2 changed files with 13 additions and 9 deletions

View File

@ -138,14 +138,14 @@ vector<tl_object_ptr<td_api::textEntity>> get_text_entities_object(const vector<
return result;
}
static bool is_word_character(uint32 a) {
switch (get_unicode_simple_category(a)) {
static bool is_word_character(uint32 code) {
switch (get_unicode_simple_category(code)) {
case UnicodeSimpleCategory::Letter:
case UnicodeSimpleCategory::DecimalNumber:
case UnicodeSimpleCategory::Number:
return true;
default:
return a == '_';
return code == '_';
}
}
@ -159,16 +159,16 @@ static bool is_word_boundary(uint32 a, uint32 b) {
}
*/
static bool is_alpha_digit(uint32 a) {
return ('0' <= a && a <= '9') || ('a' <= a && a <= 'z') || ('A' <= a && a <= 'Z');
static bool is_alpha_digit(uint32 code) {
return ('0' <= code && code <= '9') || ('a' <= code && code <= 'z') || ('A' <= code && code <= 'Z');
}
static bool is_alpha_digit_or_underscore(uint32 a) {
return is_alpha_digit(a) || a == '_';
static bool is_alpha_digit_or_underscore(uint32 code) {
return is_alpha_digit(code) || code == '_';
}
static bool is_alpha_digit_or_underscore_or_minus(uint32 a) {
return is_alpha_digit_or_underscore(a) || a == '-';
static bool is_alpha_digit_or_underscore_or_minus(uint32 code) {
return is_alpha_digit_or_underscore(code) || code == '-';
}
// This functions just implements corresponding regexps

View File

@ -186,6 +186,10 @@ string strip_empty_characters(string str, size_t max_length) {
return string();
}
if (trimmed[i] == ' ' || trimmed[i] == '\n') {
i++;
continue;
}
if (static_cast<unsigned char>(trimmed[i]) == 0xE2 && static_cast<unsigned char>(trimmed[i + 1]) == 0x80 &&
(static_cast<unsigned char>(trimmed[i + 2]) == 0x8C || static_cast<unsigned char>(trimmed[i + 2]) == 0x8D ||
static_cast<unsigned char>(trimmed[i + 2]) == 0xAE)) {