Better strip_empty_characters.
GitOrigin-RevId: 35863d02683e75da361712647d643866ae4800cf
This commit is contained in:
parent
cfe4d9bdce
commit
a92860a046
@ -138,14 +138,14 @@ vector<tl_object_ptr<td_api::textEntity>> get_text_entities_object(const vector<
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_word_character(uint32 a) {
|
static bool is_word_character(uint32 code) {
|
||||||
switch (get_unicode_simple_category(a)) {
|
switch (get_unicode_simple_category(code)) {
|
||||||
case UnicodeSimpleCategory::Letter:
|
case UnicodeSimpleCategory::Letter:
|
||||||
case UnicodeSimpleCategory::DecimalNumber:
|
case UnicodeSimpleCategory::DecimalNumber:
|
||||||
case UnicodeSimpleCategory::Number:
|
case UnicodeSimpleCategory::Number:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return a == '_';
|
return code == '_';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,16 +159,16 @@ static bool is_word_boundary(uint32 a, uint32 b) {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool is_alpha_digit(uint32 a) {
|
static bool is_alpha_digit(uint32 code) {
|
||||||
return ('0' <= a && a <= '9') || ('a' <= a && a <= 'z') || ('A' <= a && a <= 'Z');
|
return ('0' <= code && code <= '9') || ('a' <= code && code <= 'z') || ('A' <= code && code <= 'Z');
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_alpha_digit_or_underscore(uint32 a) {
|
static bool is_alpha_digit_or_underscore(uint32 code) {
|
||||||
return is_alpha_digit(a) || a == '_';
|
return is_alpha_digit(code) || code == '_';
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_alpha_digit_or_underscore_or_minus(uint32 a) {
|
static bool is_alpha_digit_or_underscore_or_minus(uint32 code) {
|
||||||
return is_alpha_digit_or_underscore(a) || a == '-';
|
return is_alpha_digit_or_underscore(code) || code == '-';
|
||||||
}
|
}
|
||||||
|
|
||||||
// This functions just implements corresponding regexps
|
// This functions just implements corresponding regexps
|
||||||
|
@ -186,6 +186,10 @@ string strip_empty_characters(string str, size_t max_length) {
|
|||||||
return string();
|
return string();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (trimmed[i] == ' ' || trimmed[i] == '\n') {
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (static_cast<unsigned char>(trimmed[i]) == 0xE2 && static_cast<unsigned char>(trimmed[i + 1]) == 0x80 &&
|
if (static_cast<unsigned char>(trimmed[i]) == 0xE2 && static_cast<unsigned char>(trimmed[i + 1]) == 0x80 &&
|
||||||
(static_cast<unsigned char>(trimmed[i + 2]) == 0x8C || static_cast<unsigned char>(trimmed[i + 2]) == 0x8D ||
|
(static_cast<unsigned char>(trimmed[i + 2]) == 0x8C || static_cast<unsigned char>(trimmed[i + 2]) == 0x8D ||
|
||||||
static_cast<unsigned char>(trimmed[i + 2]) == 0xAE)) {
|
static_cast<unsigned char>(trimmed[i + 2]) == 0xAE)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user