-
Notifications
You must be signed in to change notification settings - Fork 38
add SimdJsonParser2 base on bitindex #60
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
49 changes: 49 additions & 0 deletions
src/jmh/java/org/simdjson/ParseAndSelectFixPathBenchMark.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| package org.simdjson; | ||
|
|
||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.util.concurrent.TimeUnit; | ||
|
|
||
| import org.openjdk.jmh.annotations.*; | ||
|
|
||
| import com.fasterxml.jackson.databind.JsonNode; | ||
| import com.fasterxml.jackson.databind.ObjectMapper; | ||
|
|
||
| @State(Scope.Benchmark) | ||
| @BenchmarkMode(Mode.Throughput) | ||
| @OutputTimeUnit(TimeUnit.SECONDS) | ||
| public class ParseAndSelectFixPathBenchMark { | ||
| @Param({"/twitter.json"}) | ||
| String fileName; | ||
| private byte[] buffer; | ||
| private final SimdJsonParser simdJsonParser = new SimdJsonParser(); | ||
| private final ObjectMapper jacksonObjectMapper = new ObjectMapper(); | ||
| private final SimdJsonParserWithFixPath simdJsonParserWithFixPath = new SimdJsonParserWithFixPath( | ||
| "statuses.0.user.default_profile", "statuses.0.user.screen_name", | ||
| "statuses.0.user.name", "statuses.0.user.id", "statuses.0.user.description", | ||
| "statuses.1.user.default_profile", "statuses.1.user.screen_name", | ||
| "statuses.1.user.name", "statuses.1.user.id", "statuses.1.user.description"); | ||
|
|
||
| @Setup(Level.Trial) | ||
| public void setup() throws IOException { | ||
| try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) { | ||
| buffer = is.readAllBytes(); | ||
| } | ||
| System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES); | ||
| } | ||
|
|
||
| @Benchmark | ||
| public JsonValue parseMultiValuesForFixPaths_SimdJson() { | ||
| return simdJsonParser.parse(buffer, buffer.length); | ||
| } | ||
|
|
||
| @Benchmark | ||
| public String[] parseMultiValuesForFixPaths_SimdJsonParserWithFixPath() { | ||
| return simdJsonParserWithFixPath.parse(buffer, buffer.length); | ||
| } | ||
|
|
||
| @Benchmark | ||
| public JsonNode parseMultiValuesForFixPaths_Jackson() throws IOException { | ||
| return jacksonObjectMapper.readTree(buffer); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
219 changes: 219 additions & 0 deletions
src/main/java/org/simdjson/SimdJsonParserWithFixPath.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,219 @@ | ||
| package org.simdjson; | ||
|
|
||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
|
|
||
| import lombok.Data; | ||
| import lombok.RequiredArgsConstructor; | ||
|
|
||
| public class SimdJsonParserWithFixPath { | ||
|
|
||
| @Data | ||
| @RequiredArgsConstructor | ||
| static class JsonNode { | ||
| private long version = 0; | ||
| private boolean isLeaf = false; | ||
| private final String name; | ||
| private String value = null; | ||
| private JsonNode parent = null; | ||
| private Map<String, JsonNode> children = new HashMap<>(); | ||
| private int start = -1; | ||
| private int end = -1; | ||
| } | ||
|
|
||
| private final SimdJsonParser parser; | ||
| private BitIndexes bitIndexes; | ||
| private final JsonNode root = new JsonNode(null); | ||
| private final JsonNode[] row; | ||
| private final String[] result; | ||
| private final String[] emptyResult; | ||
| private JsonNode ptr; | ||
| private byte[] buffer; | ||
| private final int expectParseCols; | ||
| // every time json string is processed, currentVersion will be incremented by 1 | ||
| private long currentVersion = 0; | ||
|
|
||
| public SimdJsonParserWithFixPath(String... args) { | ||
| parser = new SimdJsonParser(); | ||
| expectParseCols = args.length; | ||
| row = new JsonNode[expectParseCols]; | ||
| result = new String[expectParseCols]; | ||
| emptyResult = new String[expectParseCols]; | ||
| for (int i = 0; i < args.length; i++) { | ||
| emptyResult[i] = null; | ||
| } | ||
| for (int i = 0; i < expectParseCols; i++) { | ||
| JsonNode cur = root; | ||
| String[] paths = args[i].split("\\."); | ||
| for (int j = 0; j < paths.length; j++) { | ||
| if (!cur.getChildren().containsKey(paths[j])) { | ||
| JsonNode child = new JsonNode(paths[j]); | ||
| cur.getChildren().put(paths[j], child); | ||
| child.setParent(cur); | ||
| } | ||
| cur = cur.getChildren().get(paths[j]); | ||
| } | ||
| cur.setLeaf(true); | ||
| row[i] = cur; | ||
| } | ||
|
|
||
| } | ||
|
|
||
| public String[] parse(byte[] buffer, int len) { | ||
| this.bitIndexes = parser.buildBitIndex(buffer, len); | ||
| if (buffer == null || buffer.length == 0) { | ||
| return emptyResult; | ||
| } | ||
| this.currentVersion++; | ||
| this.ptr = root; | ||
| this.buffer = buffer; | ||
|
|
||
| switch (buffer[bitIndexes.peek()]) { | ||
| case '{' -> { | ||
| parseMap(); | ||
| } | ||
| case '[' -> { | ||
| parseList(); | ||
| } | ||
| default -> { | ||
| throw new RuntimeException("invalid json format"); | ||
| } | ||
| } | ||
| return getResult(); | ||
| } | ||
|
|
||
| private String parseValue() { | ||
| int start = bitIndexes.advance(); | ||
| int next = bitIndexes.peek(); | ||
| String field = new String(buffer, start, next - start).trim(); | ||
| if ("null".equalsIgnoreCase(field)) { | ||
| return null; | ||
| } | ||
| // field type is string or type is decimal | ||
| if (field.startsWith("\"")) { | ||
| field = field.substring(1, field.length() - 1); | ||
| } | ||
| return field; | ||
| } | ||
|
|
||
| private void parseElement(String expectFieldName) { | ||
| // if expectFieldName is null, parent is map, else is list | ||
| if (expectFieldName == null) { | ||
| expectFieldName = parseValue(); | ||
| bitIndexes.advance(); // skip : | ||
| } | ||
| if (!ptr.getChildren().containsKey(expectFieldName)) { | ||
| skip(false); | ||
| return; | ||
| } | ||
| ptr = ptr.getChildren().get(expectFieldName); | ||
| switch (buffer[bitIndexes.peek()]) { | ||
| case '{' -> { | ||
| parseMap(); | ||
| } | ||
| case '[' -> { | ||
| parseList(); | ||
| } | ||
| default -> { | ||
| ptr.setValue(skip(true)); | ||
| ptr.setVersion(currentVersion); | ||
| } | ||
| } | ||
| ptr = ptr.getParent(); | ||
| } | ||
|
|
||
| private void parseMap() { | ||
| if (ptr.getChildren() == null) { | ||
| ptr.setValue(skip(true)); | ||
| ptr.setVersion(currentVersion); | ||
| return; | ||
| } | ||
| ptr.setStart(bitIndexes.peek()); | ||
| bitIndexes.advance(); | ||
| while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') { | ||
| parseElement(null); | ||
| if (buffer[bitIndexes.peek()] == ',') { | ||
| bitIndexes.advance(); | ||
| } | ||
| } | ||
| ptr.setEnd(bitIndexes.peek()); | ||
| if (ptr.isLeaf()) { | ||
| ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | ||
| ptr.setVersion(currentVersion); | ||
| } | ||
| bitIndexes.advance(); | ||
| } | ||
|
|
||
| private void parseList() { | ||
| if (ptr.getChildren() == null) { | ||
| ptr.setValue(skip(true)); | ||
| ptr.setVersion(currentVersion); | ||
| return; | ||
| } | ||
| ptr.setStart(bitIndexes.peek()); | ||
| bitIndexes.advance(); | ||
| int i = 0; | ||
| while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') { | ||
| parseElement("" + i); | ||
| if (buffer[bitIndexes.peek()] == ',') { | ||
| bitIndexes.advance(); | ||
| } | ||
| i++; | ||
| } | ||
| ptr.setEnd(bitIndexes.peek()); | ||
| if (ptr.isLeaf()) { | ||
| ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | ||
| ptr.setVersion(currentVersion); | ||
| } | ||
| bitIndexes.advance(); | ||
| } | ||
|
|
||
| private String skip(boolean retainValue) { | ||
| int i = 0; | ||
| int start = retainValue ? bitIndexes.peek() : 0; | ||
| switch (buffer[bitIndexes.peek()]) { | ||
| case '{' -> { | ||
| i++; | ||
| while (i > 0) { | ||
| bitIndexes.advance(); | ||
| if (buffer[bitIndexes.peek()] == '{') { | ||
| i++; | ||
| } else if (buffer[bitIndexes.peek()] == '}') { | ||
| i--; | ||
| } | ||
| } | ||
| int end = bitIndexes.peek(); | ||
| bitIndexes.advance(); | ||
| return retainValue ? new String(buffer, start, end - start + 1) : null; | ||
| } | ||
| case '[' -> { | ||
| i++; | ||
| while (i > 0) { | ||
| bitIndexes.advance(); | ||
| if (buffer[bitIndexes.peek()] == '[') { | ||
| i++; | ||
| } else if (buffer[bitIndexes.peek()] == ']') { | ||
| i--; | ||
| } | ||
| } | ||
| int end = bitIndexes.peek(); | ||
| bitIndexes.advance(); | ||
| return retainValue ? new String(buffer, start, end - start + 1) : null; | ||
| } | ||
| default -> { | ||
| return parseValue(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private String[] getResult() { | ||
| for (int i = 0; i < expectParseCols; i++) { | ||
| if (row[i].getVersion() < currentVersion) { | ||
| result[i] = null; | ||
| continue; | ||
| } | ||
| result[i] = row[i].getValue(); | ||
| } | ||
| return result; | ||
| } | ||
| } |
33 changes: 33 additions & 0 deletions
src/test/java/org/simdjson/JsonMultiValueParsingTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| package org.simdjson; | ||
|
|
||
| import static org.simdjson.testutils.SimdJsonAssertions.assertThat; | ||
| import static org.simdjson.testutils.TestUtils.toUtf8; | ||
|
|
||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| public class JsonMultiValueParsingTest { | ||
| @Test | ||
| public void testParseMultiValue() { | ||
| byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}"); | ||
| SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("field1.field2", "field1.field3", "field4", "field4.0", "field5"); | ||
| String[] result = parser.parse(json, json.length); | ||
| assertThat(result[0]).isEqualTo("value2"); | ||
| assertThat(result[1]).isEqualTo("3"); | ||
| assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]"); | ||
| assertThat(result[3]).isEqualTo("value4"); | ||
| assertThat(result[4]).isEqualTo(null); | ||
| } | ||
|
|
||
| @Test | ||
| public void testNonAsciiCharacters() { | ||
| byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}"); | ||
| SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀"); | ||
| // when | ||
| String[] result = parser.parse(json, json.length); | ||
| // then | ||
| assertThat(result[0]).isEqualTo("1"); | ||
| assertThat(result[1]).isEqualTo("2"); | ||
| assertThat(result[2]).isEqualTo("3"); | ||
| assertThat(result[3]).isEqualTo("4"); | ||
| } | ||
| } |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.