@@ -27,44 +27,49 @@ struct structural_parser : structural_iterator {
27
27
current_string_buf_loc{parser.doc ->string_buf .get ()} {
28
28
}
29
29
30
- WARN_UNUSED really_inline error_code start_scope (bool parent_is_array) {
30
+ WARN_UNUSED really_inline error_code start_scope (bool is_array) {
31
+ depth++;
32
+ if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
31
33
parser.containing_scope [depth].tape_index = next_tape_index ();
32
34
parser.containing_scope [depth].count = 0 ;
33
35
tape.skip (); // We don't actually *write* the start element until the end.
34
- parser.is_array [depth] = parent_is_array;
35
- depth++;
36
- if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
36
+ parser.is_array [depth] = is_array;
37
37
return SUCCESS;
38
38
}
39
39
40
40
WARN_UNUSED really_inline error_code start_document () {
41
41
log_start_value (" document" );
42
- return start_scope (false );
42
+ parser.containing_scope [depth].tape_index = next_tape_index ();
43
+ parser.containing_scope [depth].count = 0 ;
44
+ tape.skip (); // We don't actually *write* the start element until the end.
45
+ parser.is_array [depth] = false ;
46
+ if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
47
+ return SUCCESS;
43
48
}
44
49
45
- WARN_UNUSED really_inline error_code start_object (bool parent_is_array ) {
50
+ WARN_UNUSED really_inline error_code start_object () {
46
51
log_start_value (" object" );
47
- return start_scope (parent_is_array );
52
+ return start_scope (false );
48
53
}
49
54
50
- WARN_UNUSED really_inline error_code start_array (bool parent_is_array ) {
55
+ WARN_UNUSED really_inline error_code start_array () {
51
56
log_start_value (" array" );
52
- return start_scope (parent_is_array );
57
+ return start_scope (true );
53
58
}
54
59
55
60
// this function is responsible for annotating the start of the scope
56
61
really_inline void end_scope (internal::tape_type start, internal::tape_type end) noexcept {
57
- depth--;
58
- // write our doc->tape location to the header scope
59
- // The root scope gets written *at* the previous location.
60
- tape.append (parser.containing_scope [depth].tape_index , end);
62
+ // SIMDJSON_ASSUME(depth > 0);
63
+ // Write the ending tape element, pointing at the start location
64
+ const uint32_t start_tape_index = parser.containing_scope [depth].tape_index ;
65
+ tape.append (start_tape_index, end);
66
+ // Write the start tape element, pointing at the end location (and including count)
61
67
// count can overflow if it exceeds 24 bits... so we saturate
62
68
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
63
- const uint32_t start_tape_index = parser.containing_scope [depth].tape_index ;
64
69
const uint32_t count = parser.containing_scope [depth].count ;
65
70
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
66
- // This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
67
71
tape_writer::write (parser.doc ->tape [start_tape_index], next_tape_index () | (uint64_t (cntsat) << 32 ), start);
72
+ depth--;
68
73
}
69
74
70
75
really_inline uint32_t next_tape_index () {
@@ -81,15 +86,38 @@ struct structural_parser : structural_iterator {
81
86
}
82
87
really_inline void end_document () {
83
88
log_end_value (" document" );
84
- end_scope (internal::tape_type::ROOT, internal::tape_type::ROOT);
89
+ constexpr uint32_t start_tape_index = 0 ;
90
+ tape.append (start_tape_index, internal::tape_type::ROOT);
91
+ tape_writer::write (parser.doc ->tape [start_tape_index], next_tape_index (), internal::tape_type::ROOT);
92
+ }
93
+
94
+ really_inline void empty_container (internal::tape_type start, internal::tape_type end) {
95
+ auto start_index = next_tape_index ();
96
+ tape.append (start_index+2 , start);
97
+ tape.append (start_index, end);
98
+ }
99
+ WARN_UNUSED really_inline bool empty_object () {
100
+ if (peek_next_char () == ' }' ) {
101
+ advance_char ();
102
+ log_value (" empty object" );
103
+ empty_container (internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
104
+ return true ;
105
+ }
106
+ return false ;
107
+ }
108
+ WARN_UNUSED really_inline bool empty_array () {
109
+ if (peek_next_char () == ' ]' ) {
110
+ advance_char ();
111
+ log_value (" empty array" );
112
+ empty_container (internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
113
+ return true ;
114
+ }
115
+ return false ;
85
116
}
86
117
87
118
// increment_count increments the count of keys in an object or values in an array.
88
- // Note that if you are at the level of the values or elements, the count
89
- // must be increment in the preceding depth (depth-1) where the array or
90
- // the object resides.
91
119
really_inline void increment_count () {
92
- parser.containing_scope [depth - 1 ].count ++; // we have a key value pair in the object at parser.depth - 1
120
+ parser.containing_scope [depth].count ++; // we have a key value pair in the object at parser.depth - 1
93
121
}
94
122
95
123
really_inline uint8_t *on_start_string () noexcept {
@@ -199,6 +227,16 @@ struct structural_parser : structural_iterator {
199
227
return SUCCESS;
200
228
}
201
229
230
+ WARN_UNUSED really_inline error_code start () {
231
+ logger::log_start ();
232
+
233
+ // If there are no structurals left, return EMPTY
234
+ if (at_end ()) { return EMPTY; }
235
+
236
+ // Push the root scope (there is always at least one scope)
237
+ return start_document ();
238
+ }
239
+
202
240
WARN_UNUSED really_inline error_code finish () {
203
241
end_document ();
204
242
parser.next_structural_index = uint32_t (current_structural + 1 - &parser.structural_indexes [0 ]);
@@ -211,29 +249,10 @@ struct structural_parser : structural_iterator {
211
249
return SUCCESS;
212
250
}
213
251
214
- really_inline void init () {
215
- log_start ();
216
- }
217
-
218
- WARN_UNUSED really_inline error_code start () {
219
- // If there are no structurals left, return EMPTY
220
- if (at_end (parser.n_structural_indexes )) {
221
- return EMPTY;
222
- }
223
-
224
- init ();
225
- // Push the root scope (there is always at least one scope)
226
- return start_document ();
227
- }
228
-
229
252
really_inline void log_value (const char *type) {
230
253
logger::log_line (*this , " " , type, " " );
231
254
}
232
255
233
- static really_inline void log_start () {
234
- logger::log_start ();
235
- }
236
-
237
256
really_inline void log_start_value (const char *type) {
238
257
logger::log_line (*this , " +" , type, " " );
239
258
if (logger::LOG_ENABLED) { logger::log_depth++; }
@@ -261,11 +280,14 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
261
280
// Read first value
262
281
//
263
282
switch (parser.current_char ()) {
264
- case ' {' :
265
- SIMDJSON_TRY ( parser.start_object (false ) );
283
+ case ' {' : {
284
+ if (parser.empty_object ()) { goto document_end; }
285
+ SIMDJSON_TRY ( parser.start_object () );
266
286
goto object_begin;
267
- case ' [' :
268
- SIMDJSON_TRY ( parser.start_array (false ) );
287
+ }
288
+ case ' [' : {
289
+ if (parser.empty_array ()) { goto document_end; }
290
+ SIMDJSON_TRY ( parser.start_array () );
269
291
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
270
292
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
271
293
if (!STREAMING) {
@@ -274,14 +296,15 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
274
296
}
275
297
}
276
298
goto array_begin;
277
- case ' "' : SIMDJSON_TRY ( parser.parse_string () ); goto finish;
278
- case ' t' : SIMDJSON_TRY ( parser.parse_root_true_atom () ); goto finish;
279
- case ' f' : SIMDJSON_TRY ( parser.parse_root_false_atom () ); goto finish;
280
- case ' n' : SIMDJSON_TRY ( parser.parse_root_null_atom () ); goto finish;
299
+ }
300
+ case ' "' : SIMDJSON_TRY ( parser.parse_string () ); goto document_end;
301
+ case ' t' : SIMDJSON_TRY ( parser.parse_root_true_atom () ); goto document_end;
302
+ case ' f' : SIMDJSON_TRY ( parser.parse_root_false_atom () ); goto document_end;
303
+ case ' n' : SIMDJSON_TRY ( parser.parse_root_null_atom () ); goto document_end;
281
304
case ' -' :
282
305
case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
283
306
case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
284
- SIMDJSON_TRY ( parser.parse_root_number () ); goto finish ;
307
+ SIMDJSON_TRY ( parser.parse_root_number () ); goto document_end ;
285
308
default :
286
309
parser.log_error (" Document starts with a non-value character" );
287
310
return TAPE_ERROR;
@@ -291,25 +314,27 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
291
314
// Object parser states
292
315
//
293
316
object_begin:
294
- switch (parser.advance_char ()) {
295
- case ' "' : {
296
- parser.increment_count ();
297
- SIMDJSON_TRY ( parser.parse_string (true ) );
298
- goto object_key_state;
299
- }
300
- case ' }' :
301
- parser.end_object ();
302
- goto scope_end;
303
- default :
317
+ if (parser.advance_char () != ' "' ) {
304
318
parser.log_error (" Object does not start with a key" );
305
319
return TAPE_ERROR;
306
320
}
321
+ parser.increment_count ();
322
+ SIMDJSON_TRY ( parser.parse_string (true ) );
323
+ goto object_field;
307
324
308
- object_key_state :
325
+ object_field :
309
326
if (unlikely ( parser.advance_char () != ' :' )) { parser.log_error (" Missing colon after key in object" ); return TAPE_ERROR; }
310
327
switch (parser.advance_char ()) {
311
- case ' {' : SIMDJSON_TRY ( parser.start_object (false ) ); goto object_begin;
312
- case ' [' : SIMDJSON_TRY ( parser.start_array (false ) ); goto array_begin;
328
+ case ' {' : {
329
+ if (parser.empty_object ()) { break ; };
330
+ SIMDJSON_TRY ( parser.start_object () );
331
+ goto object_begin;
332
+ }
333
+ case ' [' : {
334
+ if (parser.empty_array ()) { break ; };
335
+ SIMDJSON_TRY ( parser.start_array () );
336
+ goto array_begin;
337
+ }
313
338
case ' "' : SIMDJSON_TRY ( parser.parse_string () ); break ;
314
339
case ' t' : SIMDJSON_TRY ( parser.parse_true_atom () ); break ;
315
340
case ' f' : SIMDJSON_TRY ( parser.parse_false_atom () ); break ;
@@ -329,7 +354,7 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
329
354
parser.increment_count ();
330
355
if (unlikely ( parser.advance_char () != ' "' )) { parser.log_error (" Key string missing at beginning of field in object" ); return TAPE_ERROR; }
331
356
SIMDJSON_TRY ( parser.parse_string (true ) );
332
- goto object_key_state ;
357
+ goto object_field ;
333
358
case ' }' :
334
359
parser.end_object ();
335
360
goto scope_end;
@@ -339,25 +364,28 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
339
364
}
340
365
341
366
scope_end:
342
- if (parser.depth == 1 ) { goto finish ; }
367
+ if (parser.depth == 0 ) { goto document_end ; }
343
368
if (parser.parser .is_array [parser.depth ]) { goto array_continue; }
344
369
goto object_continue;
345
370
346
371
//
347
372
// Array parser states
348
373
//
349
374
array_begin:
350
- if (parser.peek_next_char () == ' ]' ) {
351
- parser.advance_char ();
352
- parser.end_array ();
353
- goto scope_end;
354
- }
355
375
parser.increment_count ();
356
376
357
- main_array_switch :
377
+ array_value :
358
378
switch (parser.advance_char ()) {
359
- case ' {' : SIMDJSON_TRY ( parser.start_object (true ) ); goto object_begin;
360
- case ' [' : SIMDJSON_TRY ( parser.start_array (true ) ); goto array_begin;
379
+ case ' {' : {
380
+ if (parser.empty_object ()) { break ; };
381
+ SIMDJSON_TRY ( parser.start_object () );
382
+ goto object_begin;
383
+ }
384
+ case ' [' : {
385
+ if (parser.empty_array ()) { break ; };
386
+ SIMDJSON_TRY ( parser.start_array () );
387
+ goto array_begin;
388
+ }
361
389
case ' "' : SIMDJSON_TRY ( parser.parse_string () ); break ;
362
390
case ' t' : SIMDJSON_TRY ( parser.parse_true_atom () ); break ;
363
391
case ' f' : SIMDJSON_TRY ( parser.parse_false_atom () ); break ;
@@ -375,7 +403,7 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
375
403
switch (parser.advance_char ()) {
376
404
case ' ,' :
377
405
parser.increment_count ();
378
- goto main_array_switch ;
406
+ goto array_value ;
379
407
case ' ]' :
380
408
parser.end_array ();
381
409
goto scope_end;
@@ -384,9 +412,10 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
384
412
return TAPE_ERROR;
385
413
}
386
414
387
- finish :
415
+ document_end :
388
416
return parser.finish ();
389
- }
417
+
418
+ } // parse_structurals()
390
419
391
420
} // namespace stage2
392
421
} // namespace SIMDJSON_IMPLEMENTATION
0 commit comments