Optimize get_unicode_simple_category with jump table.
This commit is contained in:
parent
96cca84a60
commit
5c9d306208
@ -145,6 +145,66 @@ static const uint32 unicode_simple_category_ranges[] = {
|
||||
4194305, 5561344, 5562369, 5695264, 5695489, 5702592, 5702657, 5887040, 5887489, 6126624, 6225921, 6243264,
|
||||
6291457, 6449504, 4294967295};
|
||||
|
||||
static const uint16 unicode_simple_category_jump_pos[] = {
|
||||
1, 55, 102, 250, 368, 436, 467, 516, 578, 625, 631, 632, 670, 710, 710, 710, 710, 710, 710,
|
||||
710, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
|
||||
712, 712, 712, 712, 741, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 834, 834, 834,
|
||||
834, 834, 834, 834, 834, 834, 859, 887, 931, 971, 1042, 1073, 1148, 1187, 1231, 1268, 1270, 1273, 1273,
|
||||
1276, 1276, 1277, 1277, 1277, 1278, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1280, 1303, 1317, 1317, 1317,
|
||||
1317, 1317, 1317, 1319, 1319, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1329, 1336, 1336, 1337, 1344, 1344,
|
||||
1344, 1344, 1344, 1349, 1410, 1410, 1412, 1426, 1435, 1444, 1520, 1522, 1522, 1524, 1525, 1525, 1525, 1525, 1525,
|
||||
1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525,
|
||||
1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1527,
|
||||
1527, 1527, 1527, 1529, 1531, 1531, 1531, 1531, 1531, 1533, 1533, 1533, 1533, 1533, 1533, 1533, 1534, 1534, 1534,
|
||||
1535, 1536, 1537, 1537, 1537, 1537, 1537, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
|
||||
1538, 1538, 1538, 1538, 1538, 1538};
|
||||
|
||||
static constexpr uint32 TABLE_SIZE = 1280;
|
||||
|
||||
static const int16 prepare_search_character_table[TABLE_SIZE] = {
|
||||
@ -1186,7 +1246,10 @@ static const int32 without_diacritics_ranges[] = {
|
||||
918000, -918001, 2147483647, 0};
|
||||
|
||||
UnicodeSimpleCategory get_unicode_simple_category(uint32 code) {
|
||||
auto it = std::upper_bound(std::begin(unicode_simple_category_ranges), std::end(unicode_simple_category_ranges),
|
||||
// CHECK((code >> 10) + 1 < sizeof(unicode_simple_category_jump_pos) / sizeof(unicode_simple_category_jump_pos[0]));
|
||||
auto *jump_pos = &unicode_simple_category_jump_pos[code >> 10];
|
||||
// CHECK(jump_pos[1] < sizeof(unicode_simple_category_ranges) / sizeof(unicode_simple_category_ranges[0]));
|
||||
auto it = std::upper_bound(&unicode_simple_category_ranges[jump_pos[0]], &unicode_simple_category_ranges[jump_pos[1]],
|
||||
(code << 5) + 30);
|
||||
return static_cast<UnicodeSimpleCategory>(*(it - 1) & 31);
|
||||
}
|
||||
|
@ -623,6 +623,16 @@ TEST(Misc, unicode) {
|
||||
test_unicode(td::remove_diacritics);
|
||||
}
|
||||
|
||||
TEST(Misc, get_unicode_simple_category) {
|
||||
td::uint32 result = 0;
|
||||
for (size_t t = 0; t < 100; t++) {
|
||||
for (td::uint32 i = 0; i <= 0x10ffff; i++) {
|
||||
result = result * 123 + static_cast<td::uint32>(static_cast<int>(td::get_unicode_simple_category(i)));
|
||||
}
|
||||
}
|
||||
LOG(INFO) << result;
|
||||
}
|
||||
|
||||
TEST(BigNum, from_decimal) {
|
||||
ASSERT_TRUE(td::BigNum::from_decimal("").is_error());
|
||||
ASSERT_TRUE(td::BigNum::from_decimal("a").is_error());
|
||||
|
Loading…
x
Reference in New Issue
Block a user