Use variable step in unicode_simple_category_jump_pos.

This commit is contained in:
levlam 2022-08-19 18:56:36 +03:00
parent 231c47e237
commit 88b52f1535
2 changed files with 69 additions and 59 deletions

View File

@ -143,64 +143,61 @@ static const uint32 unicode_simple_category_ranges[] = {
6291457, 6449504, 4294967295};
static const uint16 unicode_simple_category_jump_pos[] = {
1, 55, 102, 250, 368, 436, 467, 516, 578, 625, 631, 632, 670, 710, 710, 710, 710, 710, 710,
710, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 741, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 834, 834, 834,
834, 834, 834, 834, 834, 834, 859, 887, 931, 971, 1042, 1073, 1148, 1187, 1231, 1268, 1270, 1273, 1273,
1276, 1276, 1277, 1277, 1277, 1278, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1280, 1303, 1317, 1317, 1317,
1317, 1317, 1317, 1319, 1319, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1329, 1336, 1336, 1337, 1344, 1344,
1344, 1344, 1344, 1349, 1410, 1410, 1412, 1426, 1435, 1444, 1520, 1522, 1522, 1524, 1525, 1525, 1525, 1525, 1525,
1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525,
1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1527,
1527, 1527, 1527, 1529, 1531, 1531, 1531, 1531, 1531, 1533, 1533, 1533, 1533, 1533, 1533, 1533, 1534, 1534, 1534,
1535, 1536, 1537, 1537, 1537, 1537, 1537, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538,
1538, 1538, 1538, 1538, 1538, 1538};
1, 9, 27, 27, 27, 27, 36, 44, 55, 55, 57, 63, 68, 75, 86, 91, 102, 114, 119,
130, 158, 180, 202, 225, 250, 271, 292, 312, 324, 332, 357, 365, 368, 383, 397, 397, 397, 407,
423, 431, 436, 437, 437, 437, 437, 440, 448, 458, 467, 472, 480, 487, 494, 498, 503, 509, 516,
524, 538, 538, 540, 540, 540, 558, 578, 592, 595, 622, 625, 625, 625, 625, 625, 626, 629, 629,
629, 629, 629, 630, 631, 631, 631, 631, 631, 631, 631, 631, 632, 632, 640, 650, 667, 669, 669,
669, 670, 682, 689, 692, 699, 706, 709, 709, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710,
710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710,
710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710,
710, 710, 710, 710, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712, 712,
712, 712, 712, 712, 712, 712, 712, 716, 716, 716, 724, 728, 731, 741, 752, 763, 769, 781, 793,
810, 825, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829,
829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829,
829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829,
829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829,
829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 829, 834, 834, 834, 834, 834,
834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834,
834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834,
834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834,
834, 834, 834, 834, 835, 835, 835, 837, 839, 857, 859, 859, 859, 861, 866, 869, 870, 877, 887,
899, 900, 904, 906, 907, 913, 923, 931, 931, 939, 945, 959, 959, 959, 965, 971, 987, 996, 1001,
1008, 1021, 1030, 1038, 1042, 1044, 1049, 1052, 1052, 1055, 1059, 1067, 1073, 1082, 1088, 1100, 1112, 1116, 1129,
1147, 1148, 1156, 1163, 1164, 1168, 1174, 1180, 1186, 1187, 1188, 1193, 1208, 1217, 1225, 1230, 1230, 1231, 1240,
1242, 1256, 1261, 1261, 1263, 1263, 1268, 1268, 1268, 1268, 1268, 1268, 1268, 1268, 1270, 1272, 1272, 1273, 1273,
1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273, 1273,
1276, 1276, 1276, 1276, 1276, 1276, 1276, 1276, 1276, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277,
1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277, 1277,
1277, 1277, 1278, 1278, 1278, 1278, 1278, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279,
1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279,
1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279,
1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1279, 1280, 1280,
1280, 1280, 1280, 1286, 1292, 1302, 1303, 1303, 1303, 1303, 1303, 1305, 1307, 1310, 1317, 1317, 1317, 1317, 1317,
1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317,
1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317, 1317,
1317, 1317, 1317, 1317, 1317, 1319, 1319, 1319, 1319, 1319, 1319, 1319, 1319, 1319, 1319, 1321, 1322, 1322, 1322,
1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322,
1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322,
1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322,
1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1322, 1329, 1329, 1329, 1335, 1335, 1335, 1336, 1336, 1336, 1336,
1336, 1336, 1336, 1336, 1336, 1336, 1336, 1336, 1336, 1336, 1336, 1336, 1336, 1336, 1337, 1341, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1346, 1348, 1349, 1351, 1367, 1385, 1385, 1385, 1393, 1401, 1410, 1410, 1410, 1410, 1410, 1410,
1410, 1410, 1410, 1410, 1410, 1410, 1410, 1410, 1411, 1412, 1412, 1412, 1413, 1420, 1420, 1420, 1426, 1426, 1426,
1426, 1426, 1426, 1426, 1426, 1426, 1426, 1435, 1435, 1439, 1444, 1444, 1444, 1444, 1444, 1444, 1445, 1450, 1454,
1455, 1511, 1520, 1520, 1520, 1520, 1521, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1524, 1524, 1524, 1524, 1524, 1524, 1524, 1524, 1525, 1537,
1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538, 1538};
static constexpr uint32 TABLE_SIZE = 1280;
@ -1243,8 +1240,11 @@ static const int32 without_diacritics_ranges[] = {
918000, -918001, 2147483647, 0};
UnicodeSimpleCategory get_unicode_simple_category(uint32 code) {
auto it = unicode_simple_category_ranges + unicode_simple_category_jump_pos[code >> 10];
auto jump_pos_index = code <= 0x20000 ? code >> 7 : (0x20000 >> 7) - (0x20000 >> 16) + (code >> 16);
// CHECK(jump_pos_index < sizeof(unicode_simple_category_ranges) / sizeof(unicode_simple_category_ranges[0]));
auto it = unicode_simple_category_ranges + unicode_simple_category_jump_pos[jump_pos_index];
code = (code << 5) + 30;
// CHECK(unicode_simple_category_ranges[unicode_simple_category_jump_pos[jump_pos_index + 1]] > code);
while (*it <= code) {
++it;
}

View File

@ -633,6 +633,16 @@ TEST(Misc, get_unicode_simple_category) {
LOG(INFO) << result;
}
TEST(Misc, get_unicode_simple_category_small) {
td::uint32 result = 0;
for (size_t t = 0; t < 1000; t++) {
for (td::uint32 i = 0; i <= 0xffff; i++) {
result = result * 123 + static_cast<td::uint32>(static_cast<int>(td::get_unicode_simple_category(i)));
}
}
LOG(INFO) << result;
}
TEST(BigNum, from_decimal) {
ASSERT_TRUE(td::BigNum::from_decimal("").is_error());
ASSERT_TRUE(td::BigNum::from_decimal("a").is_error());