Optimize json_string_decode by 30-400%.

This commit is contained in:
levlam 2023-08-08 16:14:04 +03:00
parent 23267aff5d
commit 2bc8ec27df

View File

@ -140,44 +140,26 @@ Result<MutableSlice> json_string_decode(Parser &parser) {
if (!parser.try_skip('"')) { if (!parser.try_skip('"')) {
return Status::Error("Opening '\"' expected"); return Status::Error("Opening '\"' expected");
} }
auto *cur_src = parser.data().data(); auto data = parser.data();
auto *end_src = parser.data().end(); auto *result_start = data.ubegin();
auto *end = cur_src; auto *cur_src = result_start;
while (end < end_src && end[0] != '"') { auto *cur_dest = result_start;
if (end[0] == '\\') { auto *end = data.uend();
end++;
}
end++;
}
if (end >= end_src) {
return Status::Error("Closing '\"' not found");
}
parser.advance(end + 1 - cur_src);
end_src = end;
auto *cur_dest = cur_src; while (true) {
auto *begin_dest = cur_src; if (cur_src == end) {
return Status::Error("Closing '\"' not found");
while (cur_src != end_src) {
auto *slash = static_cast<char *>(std::memchr(cur_src, '\\', end_src - cur_src));
if (slash == nullptr) {
slash = end_src;
} }
std::memmove(cur_dest, cur_src, slash - cur_src); if (*cur_src == '"') {
cur_dest += slash - cur_src; parser.advance(cur_src + 1 - result_start);
cur_src = slash; return data.substr(0, cur_dest - result_start);
if (cur_src != end_src) { }
if (*cur_src == '\\') {
cur_src++; cur_src++;
if (cur_src == end_src) { if (cur_src == end) {
// TODO UNREACHABLE(); return Status::Error("Closing '\"' not found");
return Status::Error("Unexpected end of string");
} }
switch (*cur_src) { switch (*cur_src) {
case '"':
case '\\':
case '/':
*cur_dest++ = *cur_src++;
break;
case 'b': case 'b':
*cur_dest++ = '\b'; *cur_dest++ = '\b';
cur_src++; cur_src++;
@ -200,7 +182,7 @@ Result<MutableSlice> json_string_decode(Parser &parser) {
break; break;
case 'u': { case 'u': {
cur_src++; cur_src++;
if (cur_src + 4 > end_src) { if (cur_src + 4 > end) {
return Status::Error("\\u has less than 4 symbols"); return Status::Error("\\u has less than 4 symbols");
} }
int num = 0; int num = 0;
@ -212,7 +194,7 @@ Result<MutableSlice> json_string_decode(Parser &parser) {
num = num * 16 + d; num = num * 16 + d;
} }
if (0xD7FF < num && num < 0xE000) { if (0xD7FF < num && num < 0xE000) {
if (cur_src + 6 <= end_src && cur_src[0] == '\\' && cur_src[1] == 'u') { if (cur_src + 6 <= end && cur_src[0] == '\\' && cur_src[1] == 'u') {
cur_src += 2; cur_src += 2;
int new_num = 0; int new_num = 0;
for (int i = 0; i < 4; i++, cur_src++) { for (int i = 0; i < 4; i++, cur_src++) {
@ -247,11 +229,16 @@ Result<MutableSlice> json_string_decode(Parser &parser) {
} }
break; break;
} }
default:
*cur_dest++ = *cur_src++;
break;
} }
} else {
*cur_dest++ = *cur_src++;
} }
} }
CHECK(cur_dest <= end_src); UNREACHABLE();
return MutableSlice(begin_dest, cur_dest); return {};
} }
Status json_string_skip(Parser &parser) { Status json_string_skip(Parser &parser) {