Add utf8_prepare_search_string.
This commit is contained in:
parent
0c21d0d596
commit
4a6e0aea43
@ -35,36 +35,7 @@ vector<string> Hints::fix_words(vector<string> words) {
|
||||
}
|
||||
|
||||
vector<string> Hints::get_words(Slice name) {
|
||||
bool in_word = false;
|
||||
string word;
|
||||
vector<string> words;
|
||||
auto pos = name.ubegin();
|
||||
auto end = name.uend();
|
||||
while (pos != end) {
|
||||
uint32 code;
|
||||
pos = next_utf8_unsafe(pos, &code);
|
||||
|
||||
code = prepare_search_character(code);
|
||||
if (code == 0) {
|
||||
continue;
|
||||
}
|
||||
if (code == ' ') {
|
||||
if (in_word) {
|
||||
words.push_back(std::move(word));
|
||||
word.clear();
|
||||
in_word = false;
|
||||
}
|
||||
} else {
|
||||
in_word = true;
|
||||
code = remove_diacritics(code);
|
||||
append_utf8_character(word, code);
|
||||
}
|
||||
}
|
||||
if (in_word) {
|
||||
words.push_back(std::move(word));
|
||||
}
|
||||
|
||||
return fix_words(std::move(words));
|
||||
return fix_words(utf8_get_search_words(name));
|
||||
}
|
||||
|
||||
void Hints::add_word(const string &word, KeyT key, std::map<string, vector<KeyT>> &word_to_keys) {
|
||||
|
@ -16,7 +16,7 @@ UnicodeSimpleCategory get_unicode_simple_category(uint32 code);
|
||||
|
||||
/**
|
||||
* Prepares unicode character for search, leaving only digits and lowercased letters.
|
||||
* Return code of replacing character or 0 if the character should be skipped.
|
||||
* Returns code of replacing character or 0 if the character should be skipped.
|
||||
*/
|
||||
uint32 prepare_search_character(uint32 code);
|
||||
|
||||
|
@ -112,6 +112,42 @@ string utf8_to_lower(Slice str) {
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<string> utf8_get_search_words(Slice str) {
|
||||
bool in_word = false;
|
||||
string word;
|
||||
vector<string> words;
|
||||
auto pos = str.ubegin();
|
||||
auto end = str.uend();
|
||||
while (pos != end) {
|
||||
uint32 code;
|
||||
pos = next_utf8_unsafe(pos, &code);
|
||||
|
||||
code = prepare_search_character(code);
|
||||
if (code == 0) {
|
||||
continue;
|
||||
}
|
||||
if (code == ' ') {
|
||||
if (in_word) {
|
||||
words.push_back(std::move(word));
|
||||
word.clear();
|
||||
in_word = false;
|
||||
}
|
||||
} else {
|
||||
in_word = true;
|
||||
code = remove_diacritics(code);
|
||||
append_utf8_character(word, code);
|
||||
}
|
||||
}
|
||||
if (in_word) {
|
||||
words.push_back(std::move(word));
|
||||
}
|
||||
return words;
|
||||
}
|
||||
|
||||
string utf8_prepare_search_string(Slice str) {
|
||||
return implode(utf8_get_search_words(str));
|
||||
}
|
||||
|
||||
string utf8_encode(CSlice data) {
|
||||
if (check_utf8(data)) {
|
||||
return data.str();
|
||||
|
@ -86,6 +86,12 @@ Slice utf8_utf16_substr(Slice str, size_t offset, size_t length);
|
||||
/// Returns UTF-8 string converted to lower case.
|
||||
string utf8_to_lower(Slice str);
|
||||
|
||||
/// Returns UTF-8 string splitted by words for search.
|
||||
vector<string> utf8_get_search_words(Slice str);
|
||||
|
||||
/// Returns UTF-8 string prepared for search, leaving only digits and lowercased letters.
|
||||
string utf8_prepare_search_string(Slice str);
|
||||
|
||||
/// Returns valid UTF-8 representation of the string.
|
||||
string utf8_encode(CSlice data);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user