Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 1d7e54f

Browse files
authored
Merge pull request simdjson#1089 from simdjson/jkeiser/stage2misc
Special case empty objects and arrays
2 parents 77c8581 + 9c33093 commit 1d7e54f

File tree

2 files changed

+105
-79
lines changed

2 files changed

+105
-79
lines changed

‎src/generic/stage2/structural_iterator.h‎

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,8 @@ class structural_iterator {
3434
return parser.len - *current_structural;
3535
}
3636

37-
really_inline bool past_end(uint32_t n_structural_indexes) {
38-
return current_structural >= &parser.structural_indexes[n_structural_indexes];
39-
}
40-
really_inline bool at_end(uint32_t n_structural_indexes) {
41-
return current_structural == &parser.structural_indexes[n_structural_indexes];
37+
really_inline bool at_end() {
38+
return current_structural == &parser.structural_indexes[parser.n_structural_indexes];
4239
}
4340
really_inline bool at_beginning() {
4441
return current_structural == parser.structural_indexes.get();

‎src/generic/stage2/structural_parser.h‎

Lines changed: 103 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -27,44 +27,49 @@ struct structural_parser : structural_iterator {
2727
current_string_buf_loc{parser.doc->string_buf.get()} {
2828
}
2929

30-
WARN_UNUSED really_inline error_code start_scope(bool parent_is_array) {
30+
WARN_UNUSED really_inline error_code start_scope(bool is_array) {
31+
depth++;
32+
if (depth >= parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
3133
parser.containing_scope[depth].tape_index = next_tape_index();
3234
parser.containing_scope[depth].count = 0;
3335
tape.skip(); // We don't actually *write* the start element until the end.
34-
parser.is_array[depth] = parent_is_array;
35-
depth++;
36-
if (depth >= parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
36+
parser.is_array[depth] = is_array;
3737
return SUCCESS;
3838
}
3939

4040
WARN_UNUSED really_inline error_code start_document() {
4141
log_start_value("document");
42-
return start_scope(false);
42+
parser.containing_scope[depth].tape_index = next_tape_index();
43+
parser.containing_scope[depth].count = 0;
44+
tape.skip(); // We don't actually *write* the start element until the end.
45+
parser.is_array[depth] = false;
46+
if (depth >= parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
47+
return SUCCESS;
4348
}
4449

45-
WARN_UNUSED really_inline error_code start_object(bool parent_is_array) {
50+
WARN_UNUSED really_inline error_code start_object() {
4651
log_start_value("object");
47-
return start_scope(parent_is_array);
52+
return start_scope(false);
4853
}
4954

50-
WARN_UNUSED really_inline error_code start_array(bool parent_is_array) {
55+
WARN_UNUSED really_inline error_code start_array() {
5156
log_start_value("array");
52-
return start_scope(parent_is_array);
57+
return start_scope(true);
5358
}
5459

5560
// this function is responsible for annotating the start of the scope
5661
really_inline void end_scope(internal::tape_type start, internal::tape_type end) noexcept {
57-
depth--;
58-
// write our doc->tape location to the header scope
59-
// The root scope gets written *at* the previous location.
60-
tape.append(parser.containing_scope[depth].tape_index, end);
62+
// SIMDJSON_ASSUME(depth > 0);
63+
// Write the ending tape element, pointing at the start location
64+
const uint32_t start_tape_index = parser.containing_scope[depth].tape_index;
65+
tape.append(start_tape_index, end);
66+
// Write the start tape element, pointing at the end location (and including count)
6167
// count can overflow if it exceeds 24 bits... so we saturate
6268
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
63-
const uint32_t start_tape_index = parser.containing_scope[depth].tape_index;
6469
const uint32_t count = parser.containing_scope[depth].count;
6570
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
66-
// This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
6771
tape_writer::write(parser.doc->tape[start_tape_index], next_tape_index() | (uint64_t(cntsat) << 32), start);
72+
depth--;
6873
}
6974

7075
really_inline uint32_t next_tape_index() {
@@ -81,15 +86,38 @@ struct structural_parser : structural_iterator {
8186
}
8287
really_inline void end_document() {
8388
log_end_value("document");
84-
end_scope(internal::tape_type::ROOT, internal::tape_type::ROOT);
89+
constexpr uint32_t start_tape_index = 0;
90+
tape.append(start_tape_index, internal::tape_type::ROOT);
91+
tape_writer::write(parser.doc->tape[start_tape_index], next_tape_index(), internal::tape_type::ROOT);
92+
}
93+
94+
really_inline void empty_container(internal::tape_type start, internal::tape_type end) {
95+
auto start_index = next_tape_index();
96+
tape.append(start_index+2, start);
97+
tape.append(start_index, end);
98+
}
99+
WARN_UNUSED really_inline bool empty_object() {
100+
if (peek_next_char() == '}') {
101+
advance_char();
102+
log_value("empty object");
103+
empty_container(internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
104+
return true;
105+
}
106+
return false;
107+
}
108+
WARN_UNUSED really_inline bool empty_array() {
109+
if (peek_next_char() == ']') {
110+
advance_char();
111+
log_value("empty array");
112+
empty_container(internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
113+
return true;
114+
}
115+
return false;
85116
}
86117

87118
// increment_count increments the count of keys in an object or values in an array.
88-
// Note that if you are at the level of the values or elements, the count
89-
// must be increment in the preceding depth (depth-1) where the array or
90-
// the object resides.
91119
really_inline void increment_count() {
92-
parser.containing_scope[depth - 1].count++; // we have a key value pair in the object at parser.depth - 1
120+
parser.containing_scope[depth].count++; // we have a key value pair in the object at parser.depth - 1
93121
}
94122

95123
really_inline uint8_t *on_start_string() noexcept {
@@ -199,6 +227,16 @@ struct structural_parser : structural_iterator {
199227
return SUCCESS;
200228
}
201229

230+
WARN_UNUSED really_inline error_code start() {
231+
logger::log_start();
232+
233+
// If there are no structurals left, return EMPTY
234+
if (at_end()) { return EMPTY; }
235+
236+
// Push the root scope (there is always at least one scope)
237+
return start_document();
238+
}
239+
202240
WARN_UNUSED really_inline error_code finish() {
203241
end_document();
204242
parser.next_structural_index = uint32_t(current_structural + 1 - &parser.structural_indexes[0]);
@@ -211,29 +249,10 @@ struct structural_parser : structural_iterator {
211249
return SUCCESS;
212250
}
213251

214-
really_inline void init() {
215-
log_start();
216-
}
217-
218-
WARN_UNUSED really_inline error_code start() {
219-
// If there are no structurals left, return EMPTY
220-
if (at_end(parser.n_structural_indexes)) {
221-
return EMPTY;
222-
}
223-
224-
init();
225-
// Push the root scope (there is always at least one scope)
226-
return start_document();
227-
}
228-
229252
really_inline void log_value(const char *type) {
230253
logger::log_line(*this, "", type, "");
231254
}
232255

233-
static really_inline void log_start() {
234-
logger::log_start();
235-
}
236-
237256
really_inline void log_start_value(const char *type) {
238257
logger::log_line(*this, "+", type, "");
239258
if (logger::LOG_ENABLED) { logger::log_depth++; }
@@ -261,11 +280,14 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
261280
// Read first value
262281
//
263282
switch (parser.current_char()) {
264-
case '{':
265-
SIMDJSON_TRY( parser.start_object(false) );
283+
case '{': {
284+
if (parser.empty_object()) { goto document_end; }
285+
SIMDJSON_TRY( parser.start_object() );
266286
goto object_begin;
267-
case '[':
268-
SIMDJSON_TRY( parser.start_array(false) );
287+
}
288+
case '[': {
289+
if (parser.empty_array()) { goto document_end; }
290+
SIMDJSON_TRY( parser.start_array() );
269291
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
270292
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
271293
if (!STREAMING) {
@@ -274,14 +296,15 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
274296
}
275297
}
276298
goto array_begin;
277-
case '"': SIMDJSON_TRY( parser.parse_string() ); goto finish;
278-
case 't': SIMDJSON_TRY( parser.parse_root_true_atom() ); goto finish;
279-
case 'f': SIMDJSON_TRY( parser.parse_root_false_atom() ); goto finish;
280-
case 'n': SIMDJSON_TRY( parser.parse_root_null_atom() ); goto finish;
299+
}
300+
case '"': SIMDJSON_TRY( parser.parse_string() ); goto document_end;
301+
case 't': SIMDJSON_TRY( parser.parse_root_true_atom() ); goto document_end;
302+
case 'f': SIMDJSON_TRY( parser.parse_root_false_atom() ); goto document_end;
303+
case 'n': SIMDJSON_TRY( parser.parse_root_null_atom() ); goto document_end;
281304
case '-':
282305
case '0': case '1': case '2': case '3': case '4':
283306
case '5': case '6': case '7': case '8': case '9':
284-
SIMDJSON_TRY( parser.parse_root_number() ); goto finish;
307+
SIMDJSON_TRY( parser.parse_root_number() ); goto document_end;
285308
default:
286309
parser.log_error("Document starts with a non-value character");
287310
return TAPE_ERROR;
@@ -291,25 +314,27 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
291314
// Object parser states
292315
//
293316
object_begin:
294-
switch (parser.advance_char()) {
295-
case '"': {
296-
parser.increment_count();
297-
SIMDJSON_TRY( parser.parse_string(true) );
298-
goto object_key_state;
299-
}
300-
case '}':
301-
parser.end_object();
302-
goto scope_end;
303-
default:
317+
if (parser.advance_char() != '"') {
304318
parser.log_error("Object does not start with a key");
305319
return TAPE_ERROR;
306320
}
321+
parser.increment_count();
322+
SIMDJSON_TRY( parser.parse_string(true) );
323+
goto object_field;
307324

308-
object_key_state:
325+
object_field:
309326
if (unlikely( parser.advance_char() != ':' )) { parser.log_error("Missing colon after key in object"); return TAPE_ERROR; }
310327
switch (parser.advance_char()) {
311-
case '{': SIMDJSON_TRY( parser.start_object(false) ); goto object_begin;
312-
case '[': SIMDJSON_TRY( parser.start_array(false) ); goto array_begin;
328+
case '{': {
329+
if (parser.empty_object()) { break; };
330+
SIMDJSON_TRY( parser.start_object() );
331+
goto object_begin;
332+
}
333+
case '[': {
334+
if (parser.empty_array()) { break; };
335+
SIMDJSON_TRY( parser.start_array() );
336+
goto array_begin;
337+
}
313338
case '"': SIMDJSON_TRY( parser.parse_string() ); break;
314339
case 't': SIMDJSON_TRY( parser.parse_true_atom() ); break;
315340
case 'f': SIMDJSON_TRY( parser.parse_false_atom() ); break;
@@ -329,7 +354,7 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
329354
parser.increment_count();
330355
if (unlikely( parser.advance_char() != '"' )) { parser.log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
331356
SIMDJSON_TRY( parser.parse_string(true) );
332-
goto object_key_state;
357+
goto object_field;
333358
case '}':
334359
parser.end_object();
335360
goto scope_end;
@@ -339,25 +364,28 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
339364
}
340365

341366
scope_end:
342-
if (parser.depth == 1) { goto finish; }
367+
if (parser.depth == 0) { goto document_end; }
343368
if (parser.parser.is_array[parser.depth]) { goto array_continue; }
344369
goto object_continue;
345370

346371
//
347372
// Array parser states
348373
//
349374
array_begin:
350-
if (parser.peek_next_char() == ']') {
351-
parser.advance_char();
352-
parser.end_array();
353-
goto scope_end;
354-
}
355375
parser.increment_count();
356376

357-
main_array_switch:
377+
array_value:
358378
switch (parser.advance_char()) {
359-
case '{': SIMDJSON_TRY( parser.start_object(true) ); goto object_begin;
360-
case '[': SIMDJSON_TRY( parser.start_array(true) ); goto array_begin;
379+
case '{': {
380+
if (parser.empty_object()) { break; };
381+
SIMDJSON_TRY( parser.start_object() );
382+
goto object_begin;
383+
}
384+
case '[': {
385+
if (parser.empty_array()) { break; };
386+
SIMDJSON_TRY( parser.start_array() );
387+
goto array_begin;
388+
}
361389
case '"': SIMDJSON_TRY( parser.parse_string() ); break;
362390
case 't': SIMDJSON_TRY( parser.parse_true_atom() ); break;
363391
case 'f': SIMDJSON_TRY( parser.parse_false_atom() ); break;
@@ -375,7 +403,7 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
375403
switch (parser.advance_char()) {
376404
case ',':
377405
parser.increment_count();
378-
goto main_array_switch;
406+
goto array_value;
379407
case ']':
380408
parser.end_array();
381409
goto scope_end;
@@ -384,9 +412,10 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
384412
return TAPE_ERROR;
385413
}
386414

387-
finish:
415+
document_end:
388416
return parser.finish();
389-
}
417+
418+
} // parse_structurals()
390419

391420
} // namespace stage2
392421
} // namespace SIMDJSON_IMPLEMENTATION

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /