From f2779b6cec8d87944d361d4fec837d99731414fe Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月13日 23:22:16 +0900 Subject: [PATCH 01/27] initial commit after `asinit .` --- assembly/index.ts | 5 +++++ assembly/tsconfig.json | 6 ++++++ index.js | 6 ++++++ package-lock.json | 37 +++++++++++++++++++++++++++++++++++++ package.json | 6 +++++- 5 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 assembly/index.ts create mode 100644 assembly/tsconfig.json create mode 100644 index.js diff --git a/assembly/index.ts b/assembly/index.ts new file mode 100644 index 00000000..66a9aafd --- /dev/null +++ b/assembly/index.ts @@ -0,0 +1,5 @@ +// The entry file of your WebAssembly module. + +export function add(a: i32, b: i32): i32 { + return a + b; +} diff --git a/assembly/tsconfig.json b/assembly/tsconfig.json new file mode 100644 index 00000000..c614e5c8 --- /dev/null +++ b/assembly/tsconfig.json @@ -0,0 +1,6 @@ +{ + "extends": "../node_modules/assemblyscript/std/assembly.json", + "include": [ + "./**/*.ts" + ] +} \ No newline at end of file diff --git a/index.js b/index.js new file mode 100644 index 00000000..4ef4c454 --- /dev/null +++ b/index.js @@ -0,0 +1,6 @@ +const fs = require("fs"); +const compiled = new WebAssembly.Module(fs.readFileSync(__dirname + "/build/as/optimized.wasm")); +const imports = {}; +Object.defineProperty(module, "exports", { + get: () => new WebAssembly.Instance(compiled, imports).exports, +}); diff --git a/package-lock.json b/package-lock.json index 9db0941f..e8767af2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -33,6 +33,12 @@ "acorn": ">= 6" } }, + "@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA=", + "dev": true + }, "@types/mocha": { "version": "5.2.6", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-5.2.6.tgz", @@ -486,6 +492,19 @@ "minimalistic-assert": "^1.0.0" } }, + "assemblyscript": { + "version": "github:AssemblyScript/assemblyscript#36040d5b5312f19a025782b5e36663823494c2f3", + "from": "github:AssemblyScript/assemblyscript", + "dev": true, + "requires": { + "@protobufjs/utf8": "^1.1.0", + "binaryen": "77.0.0-nightly.20190407", + "glob": "^7.1.3", + "long": "^4.0.0", + "opencollective-postinstall": "^2.0.0", + "source-map-support": "^0.5.11" + } + }, "assert": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/assert/-/assert-2.0.0.tgz", @@ -667,6 +686,12 @@ "integrity": "sha512-Un7MIEDdUC5gNpcGDV97op1Ywk748MpHcFTHoYs6qnj1Z3j7I53VG3nwZhKzoBZmbdRNnb6WRdFlwl7tSDuZGw==", "dev": true }, + "binaryen": { + "version": "77.0.0-nightly.20190407", + "resolved": "https://registry.npmjs.org/binaryen/-/binaryen-77.0.0-nightly.20190407.tgz", + "integrity": "sha512-1mxYNvQ0xywMe582K7V6Vo2zzhZZxMTeGHH8aE/+/AND8f64D8Q1GThVY3RVRwGY/4p+p95ccw9Xbw2ovFXRIg==", + "dev": true + }, "blob": { "version": "0.0.5", "resolved": "https://registry.npmjs.org/blob/-/blob-0.0.5.tgz", @@ -3392,6 +3417,12 @@ "object.assign": "^4.1.0" } }, + "long": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz", + "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==", + "dev": true + }, "loud-rejection": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/loud-rejection/-/loud-rejection-1.6.0.tgz", @@ -4383,6 +4414,12 @@ "mimic-fn": "^1.0.0" } }, + "opencollective-postinstall": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.2.tgz", + "integrity": "sha512-pVOEP16TrAO2/fjej1IdOyupJY8KDUM1CvsaScRbw6oddvpQoOfGk4ywha0HKKVAD6RkW4x6Q+tNBwhf3Bgpuw==", + "dev": true + }, "optimist": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz", diff --git a/package.json b/package.json index f88772c6..23b5a9bf 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,10 @@ "format": "prettier --write 'src/**/*.ts' 'test/**/*.ts'", "profile:encode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "profile:decode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", - "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts" + "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts", + "asbuild:untouched": "asc assembly/index.ts -b build/as/untouched.wasm -t build/as/untouched.wat --sourceMap --validate --debug", + "asbuild:optimized": "asc assembly/index.ts -b build/as/optimized.wasm -t build/as/optimized.wat --sourceMap --validate --optimize", + "asbuild": "npm run asbuild:untouched && npm run asbuild:optimized" }, "repository": { "type": "git", @@ -46,6 +49,7 @@ "@types/node": "^11.13.10", "@typescript-eslint/eslint-plugin": "^1.9.0", "@typescript-eslint/parser": "^1.9.0", + "assemblyscript": "github:AssemblyScript/assemblyscript", "assert": "^2.0.0", "core-js": "^3.0.1", "eslint": "^5.16.0", From 80a1bd4a0fae89ddccda3b290e960983c0e158b9 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月14日 23:35:01 +0900 Subject: [PATCH 02/27] implement utf8Encode in AssemblyScript (but slow) --- .eslintrc.js | 3 +-- assembly/index.ts | 46 ++++++++++++++++++++++++++++++++++++++++++++-- index.js | 6 ------ package.json | 4 ++-- src/Decoder.ts | 13 ++++++++++--- src/utils/utf8.ts | 18 +++++++++--------- tsconfig.json | 3 ++- wasmModule.ts | 27 +++++++++++++++++++++++++++ 8 files changed, 95 insertions(+), 25 deletions(-) delete mode 100644 index.js create mode 100644 wasmModule.ts diff --git a/.eslintrc.js b/.eslintrc.js index 2735e963..23842c50 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -21,7 +21,7 @@ module.exports = { "no-var": "warn", "valid-typeof": "warn", // "bigint" is not yet supported "no-return-await": "warn", - "prefer-const": "warn", + // "prefer-const": "warn", // TODO: AssemblyScript has different semantics. "guard-for-in": "warn", "curly": "warn", @@ -34,7 +34,6 @@ module.exports = { "@typescript-eslint/prefer-includes": "warn", "@typescript-eslint/prefer-string-starts-ends-with": "warn", "@typescript-eslint/no-use-before-define": "warn", - "@typescript-eslint/restrict-plus-operands": "error", "@typescript-eslint/await-thenable": "error", "@typescript-eslint/no-for-in-array": "error", diff --git a/assembly/index.ts b/assembly/index.ts index 66a9aafd..34d6ff1d 100644 --- a/assembly/index.ts +++ b/assembly/index.ts @@ -1,5 +1,47 @@ // The entry file of your WebAssembly module. -export function add(a: i32, b: i32): i32 { - return a + b; +// memory is assumed: +// [input][output] + +export function utf8ToUtf16(byteLength: i32, outputOffset: i32): i32 { + let inputOffset: i32 = 0; + while (inputOffset < byteLength) { + let byte1: u16 = load(inputOffset++); + if ((byte1 & 0x80) === 0) { + // 1 byte + store(outputOffset, byte1); + outputOffset += 2; + } else if ((byte1 & 0xe0) === 0xc0) { + // 2 bytes + let byte2: u16 = load(inputOffset++) & 0x3f; + // FIXME: consider endians + store(outputOffset, ((byte1 & 0x1f) << 6) | byte2); + outputOffset += 2; + } else if ((byte1 & 0xf0) === 0xe0) { + // 3 bytes + let byte2: u16 = load(inputOffset++) & 0x3f; + let byte3: u16 = load(inputOffset++) & 0x3f; + store(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3); + outputOffset += 2; + } else if ((byte1 & 0xf8) === 0xf0) { + // 4 bytes + let byte2 = load(inputOffset++) & 0x3f; + let byte3 = load(inputOffset++) & 0x3f; + let byte4 = load(inputOffset++) & 0x3f; + let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4; + if (codepoint> 0xffff) { + codepoint -= 0x10000; + store(outputOffset, ((codepoint>>> 10) & 0x3ff) | 0xd800); + outputOffset += 2; + codepoint = 0xdc00 | (codepoint & 0x3ff); + } + store(outputOffset, codepoint); + outputOffset += 2; + } else { + // invalid UTF-8 + store(outputOffset++, byte1); + outputOffset += 2; + } + } + return outputOffset; } diff --git a/index.js b/index.js deleted file mode 100644 index 4ef4c454..00000000 --- a/index.js +++ /dev/null @@ -1,6 +0,0 @@ -const fs = require("fs"); -const compiled = new WebAssembly.Module(fs.readFileSync(__dirname + "/build/as/optimized.wasm")); -const imports = {}; -Object.defineProperty(module, "exports", { - get: () => new WebAssembly.Instance(compiled, imports).exports, -}); diff --git a/package.json b/package.json index 23b5a9bf..fbf07735 100644 --- a/package.json +++ b/package.json @@ -24,8 +24,8 @@ "profile:encode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "profile:decode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts", - "asbuild:untouched": "asc assembly/index.ts -b build/as/untouched.wasm -t build/as/untouched.wat --sourceMap --validate --debug", - "asbuild:optimized": "asc assembly/index.ts -b build/as/optimized.wasm -t build/as/optimized.wat --sourceMap --validate --optimize", + "asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug", + "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate --optimize", "asbuild": "npm run asbuild:untouched && npm run asbuild:optimized" }, "repository": { diff --git a/src/Decoder.ts b/src/Decoder.ts index 9250f195..03cd2376 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -2,6 +2,7 @@ import { prettyByte } from "./utils/prettyByte"; import { ExtensionCodec } from "./ExtensionCodec"; import { getInt64, getUint64 } from "./utils/int"; import { utf8Decode } from "./utils/utf8"; +import { utf8Decode2 } from "../wasmModule"; import { createDataView, ensureUint8Array } from "./utils/typedArrays"; enum State { @@ -29,6 +30,7 @@ type StackState = StackArrayState | StackMapState; const HEAD_BYTE_REQUIRED = -1; const EMPTY_VIEW = new DataView(new ArrayBuffer(0)); +const EMPTY_BYTES = new Uint8Array(EMPTY_VIEW.buffer); // IE11: Hack to support IE11. // IE11: Drop this hack and just use RangeError when IE11 is obsolete. @@ -49,14 +51,16 @@ export class Decoder { totalPos = 0; pos = 0; - view: DataView = EMPTY_VIEW; + view = EMPTY_VIEW; + bytes = EMPTY_BYTES; headByte = HEAD_BYTE_REQUIRED; readonly stack: Array = []; constructor(readonly extensionCodec = ExtensionCodec.defaultCodec) {} setBuffer(buffer: ArrayLike | Uint8Array): void { - this.view = createDataView(buffer); + this.bytes = ensureUint8Array(buffer); + this.view = createDataView(this.bytes); this.pos = 0; } @@ -371,7 +375,10 @@ export class Decoder { } decodeUtf8String(byteLength: number, headOffset: number): string { - const object = utf8Decode(this.view, this.pos + headOffset, byteLength); + if (this.bytes.byteLength < this.pos + headOffset + byteLength) { + throw MORE_DATA; + } + const object = utf8Decode2(this.bytes, this.pos + headOffset, byteLength); this.pos += headOffset + byteLength; return object; } diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index 972a531f..7d4bf43d 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -85,28 +85,28 @@ export function utf8Encode(str: string, view: DataView, offset: number): void { } } -export function utf8Decode(view: DataView, offset: number, byteLength: number): string { +export function utf8Decode(bytes: Uint8Array, offset: number, byteLength: number): string { const out: Array = []; const end = offset + byteLength; while (offset < end) { - const byte1 = view.getUint8(offset++); + const byte1 = bytes[offset++]; if ((byte1 & 0x80) === 0) { // 1 byte out.push(byte1); } else if ((byte1 & 0xe0) === 0xc0) { // 2 bytes - const byte2 = view.getUint8(offset++) & 0x3f; + const byte2 = bytes[offset++] & 0x3f; out.push(((byte1 & 0x1f) << 6) | byte2); } else if ((byte1 & 0xf0) === 0xe0) { // 3 bytes - const byte2 = view.getUint8(offset++) & 0x3f; - const byte3 = view.getUint8(offset++) & 0x3f; + const byte2 = bytes[offset++] & 0x3f; + const byte3 = bytes[offset++] & 0x3f; out.push(((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3); } else if ((byte1 & 0xf8) === 0xf0) { // 4 bytes - const byte2 = view.getUint8(offset++) & 0x3f; - const byte3 = view.getUint8(offset++) & 0x3f; - const byte4 = view.getUint8(offset++) & 0x3f; + const byte2 = bytes[offset++] & 0x3f; + const byte3 = bytes[offset++] & 0x3f; + const byte4 = bytes[offset++] & 0x3f; let codepoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4; if (codepoint> 0xffff) { codepoint -= 0x10000; @@ -115,7 +115,7 @@ export function utf8Decode(view: DataView, offset: number, byteLength: number): } out.push(codepoint); } else { - throw new Error(`Invalid UTF-8 byte ${prettyByte(byte1)} at ${offset}`); + out.push(byte1); } } return String.fromCharCode(...out); diff --git a/tsconfig.json b/tsconfig.json index fb5d4ef1..cae8370d 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -58,5 +58,6 @@ /* Experimental Options */ // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ - } + }, + "exclude": ["assembly"] } diff --git a/wasmModule.ts b/wasmModule.ts new file mode 100644 index 00000000..b03faf5a --- /dev/null +++ b/wasmModule.ts @@ -0,0 +1,27 @@ +import fs from "fs"; + +declare var WebAssembly: any; + +const wasmModule = new WebAssembly.Module(fs.readFileSync(__dirname + "/build/wasm/optimized.wasm")); +const wasmInstance = new WebAssembly.Instance(wasmModule); +let instanceMemory = new Uint8Array(wasmInstance.exports.memory.buffer); + +export function utf8Decode2(bytes: Uint8Array, offset: number, byteLength: number): string { + const workingMemorySize = bytes.length * 3; // input(utf8) + output(utf16) + if (instanceMemory.length < workingMemorySize) { + const page = Math.ceil((workingMemorySize - instanceMemory.length) / 0x10000); + wasmInstance.exports.memory.grow(page); + instanceMemory = new Uint8Array(wasmInstance.exports.memory.buffer); + } + + instanceMemory.set(bytes.subarray(offset, offset + byteLength)); + // console.log(instanceMemory.subarray(0, 10)); + + const outputStart = Math.ceil(byteLength / 2) * 2; + const outputEnd = wasmInstance.exports.utf8ToUtf16(byteLength, outputStart); + const utf16array = new Uint16Array(wasmInstance.exports.memory.buffer, outputStart, (outputEnd - outputStart) / 2); + // console.log([byteLength, outputStart, outputEnd]); + // console.log(instanceMemory.subarray(0, 10)); + // console.log(utf16array); + return String.fromCharCode(...utf16array); +} From be528f3e92517360ac0b0332dcbf33b53f71f4dc Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月15日 09:12:46 +0900 Subject: [PATCH 03/27] asbuild -O3; add benchmark; USE_WASM=true --- benchmark/string.ts | 21 +++++++++++++++++++++ package.json | 2 +- src/Decoder.ts | 10 +++++++++- 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 benchmark/string.ts diff --git a/benchmark/string.ts b/benchmark/string.ts new file mode 100644 index 00000000..ebf03147 --- /dev/null +++ b/benchmark/string.ts @@ -0,0 +1,21 @@ +import { encode, decode } from "../src"; + +const data = "Hello, 🌏\n".repeat(1000); + +// warm up +const encoded = encode(data); +decode(encoded); + +// run + +console.time("encode"); +for (let i = 0; i < 10000; i++) { + encode(data); +} +console.timeEnd("encode"); + +console.time("decode"); +for (let i = 0; i < 10000; i++) { + decode(encoded); +} +console.timeEnd("decode"); diff --git a/package.json b/package.json index fbf07735..371dc4c0 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "profile:decode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts", "asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug", - "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate --optimize", + "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3", "asbuild": "npm run asbuild:untouched && npm run asbuild:optimized" }, "repository": { diff --git a/src/Decoder.ts b/src/Decoder.ts index 03cd2376..b249e1df 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -47,6 +47,8 @@ export const DataViewIndexOutOfBoundsError: typeof Error = (() => { const MORE_DATA = new DataViewIndexOutOfBoundsError("Insufficient data"); +const USE_WASM = process.env.USE_WASM === "true"; + export class Decoder { totalPos = 0; pos = 0; @@ -378,7 +380,13 @@ export class Decoder { if (this.bytes.byteLength < this.pos + headOffset + byteLength) { throw MORE_DATA; } - const object = utf8Decode2(this.bytes, this.pos + headOffset, byteLength); + + let object: string; + if (USE_WASM) { + object = utf8Decode2(this.bytes, this.pos + headOffset, byteLength); + } else { + object = utf8Decode(this.bytes, this.pos + headOffset, byteLength); + } this.pos += headOffset + byteLength; return object; } From d9bf5af834133cd136a055cb6d5f1f941673f1bc Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月15日 10:36:57 +0900 Subject: [PATCH 04/27] fix misuse of loaad() --- assembly/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assembly/index.ts b/assembly/index.ts index 34d6ff1d..e62a1565 100644 --- a/assembly/index.ts +++ b/assembly/index.ts @@ -19,8 +19,8 @@ export function utf8ToUtf16(byteLength: i32, outputOffset: i32): i32 { outputOffset += 2; } else if ((byte1 & 0xf0) === 0xe0) { // 3 bytes - let byte2: u16 = load(inputOffset++) & 0x3f; - let byte3: u16 = load(inputOffset++) & 0x3f; + let byte2: u16 = load(inputOffset++) & 0x3f; + let byte3: u16 = load(inputOffset++) & 0x3f; store(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3); outputOffset += 2; } else if ((byte1 & 0xf8) === 0xf0) { From a4903ecadd4b85c7f989b166073e94f425fbd2c7 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月15日 11:09:49 +0900 Subject: [PATCH 05/27] use String.fromCharCode.apply() in WASM ver. --- benchmark/string.ts | 6 +++--- wasmModule.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmark/string.ts b/benchmark/string.ts index ebf03147..b47613d6 100644 --- a/benchmark/string.ts +++ b/benchmark/string.ts @@ -1,6 +1,6 @@ import { encode, decode } from "../src"; -const data = "Hello, 🌏\n".repeat(1000); +const data = "Hello, 🌏\n".repeat(10000); // warm up const encoded = encode(data); @@ -9,13 +9,13 @@ decode(encoded); // run console.time("encode"); -for (let i = 0; i < 10000; i++) { +for (let i = 0; i < 1000; i++) { encode(data); } console.timeEnd("encode"); console.time("decode"); -for (let i = 0; i < 10000; i++) { +for (let i = 0; i < 1000; i++) { decode(encoded); } console.timeEnd("decode"); diff --git a/wasmModule.ts b/wasmModule.ts index b03faf5a..6359e7e2 100644 --- a/wasmModule.ts +++ b/wasmModule.ts @@ -23,5 +23,5 @@ export function utf8Decode2(bytes: Uint8Array, offset: number, byteLength: numbe // console.log([byteLength, outputStart, outputEnd]); // console.log(instanceMemory.subarray(0, 10)); // console.log(utf16array); - return String.fromCharCode(...utf16array); + return String.fromCharCode.apply(String, utf16array as any); } From 6c5545262d7b5bbce947259f7c06dcb2dc6edada Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月16日 09:47:50 +0900 Subject: [PATCH 06/27] re-structured wasm modules --- package-lock.json | 9 ++++++-- package.json | 13 ++++++----- src/Decoder.ts | 8 +++---- src/wasmFunctions.ts | 51 ++++++++++++++++++++++++++++++++++++++++++++ tools/pack-wasm.ts | 25 ++++++++++++++++++++++ wasmModule.ts | 27 ----------------------- 6 files changed, 94 insertions(+), 39 deletions(-) create mode 100644 src/wasmFunctions.ts create mode 100644 tools/pack-wasm.ts delete mode 100644 wasmModule.ts diff --git a/package-lock.json b/package-lock.json index e8767af2..491fd726 100644 --- a/package-lock.json +++ b/package-lock.json @@ -39,6 +39,12 @@ "integrity": "sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA=", "dev": true }, + "@types/base64-js": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/@types/base64-js/-/base64-js-1.2.5.tgz", + "integrity": "sha1-WCskdhaabLpGCiFNR2x0REHYc9U=", + "dev": true + }, "@types/mocha": { "version": "5.2.6", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-5.2.6.tgz", @@ -656,8 +662,7 @@ "base64-js": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.3.0.tgz", - "integrity": "sha512-ccav/yGvoa80BQDljCxsmmQ3Xvx60/UpBIij5QN21W3wBi/hhIC9OoO+KLpu9IJTS9j4DRVJ3aDDF9cMSoa2lw==", - "dev": true + "integrity": "sha512-ccav/yGvoa80BQDljCxsmmQ3Xvx60/UpBIij5QN21W3wBi/hhIC9OoO+KLpu9IJTS9j4DRVJ3aDDF9cMSoa2lw==" }, "base64id": { "version": "1.0.0", diff --git a/package.json b/package.json index 371dc4c0..0f577b5a 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "types": "./dist/index.d.ts", "scripts": { "build": "npm publish --dry-run", - "prepare": "rm -rf dist dist.* ; tsc -p tsconfig.dist.json && webpack", + "prepare": "rm -rf dist dist.* ; tsc -p tsconfig.dist.json && webpack && npm run asbuild", "prepublishOnly": "TEST_DIST=true npm run test", "clean": "rm -rf build dist dist.*", "test": "mocha 'test/**/*.test.ts'", @@ -24,9 +24,9 @@ "profile:encode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "profile:decode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts", - "asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug", - "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3", - "asbuild": "npm run asbuild:untouched && npm run asbuild:optimized" + "asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug --measure", + "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3 --measure", + "asbuild": "rm -rf build/wasm && npm run asbuild:untouched && npm run asbuild:optimized && ts-node tools/pack-wasm.ts" }, "repository": { "type": "git", @@ -45,6 +45,7 @@ "homepage": "https://msgpack.org/", "devDependencies": { "@bitjourney/check-es-version-webpack-plugin": "^1.0.2", + "@types/base64-js": "^1.2.5", "@types/mocha": "^5.2.6", "@types/node": "^11.13.10", "@typescript-eslint/eslint-plugin": "^1.9.0", @@ -75,7 +76,9 @@ "webpack": "^4.30.0", "webpack-cli": "^3.3.1" }, - "dependencies": {}, + "dependencies": { + "base64-js": "^1.3.0" + }, "files": [ "src/**/*.*", "dist/**/*.*", diff --git a/src/Decoder.ts b/src/Decoder.ts index b249e1df..063ee120 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -2,7 +2,7 @@ import { prettyByte } from "./utils/prettyByte"; import { ExtensionCodec } from "./ExtensionCodec"; import { getInt64, getUint64 } from "./utils/int"; import { utf8Decode } from "./utils/utf8"; -import { utf8Decode2 } from "../wasmModule"; +import { utf8DecodeWasm, WASM_AVAILABLE } from "./wasmFunctions"; import { createDataView, ensureUint8Array } from "./utils/typedArrays"; enum State { @@ -47,8 +47,6 @@ export const DataViewIndexOutOfBoundsError: typeof Error = (() => { const MORE_DATA = new DataViewIndexOutOfBoundsError("Insufficient data"); -const USE_WASM = process.env.USE_WASM === "true"; - export class Decoder { totalPos = 0; pos = 0; @@ -382,8 +380,8 @@ export class Decoder { } let object: string; - if (USE_WASM) { - object = utf8Decode2(this.bytes, this.pos + headOffset, byteLength); + if (WASM_AVAILABLE) { + object = utf8DecodeWasm(this.bytes, this.pos + headOffset, byteLength); } else { object = utf8Decode(this.bytes, this.pos + headOffset, byteLength); } diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts new file mode 100644 index 00000000..a3ec2def --- /dev/null +++ b/src/wasmFunctions.ts @@ -0,0 +1,51 @@ +let wasmModule: any; +try { + wasmModule = require("../build/wasm/optimized.wasm.js").wasmModule; +} catch { + // WebAssembly is not supported. +} + +declare var WebAssembly: any; +const WASM_MEMORY_PAGE_SIZE = 0x10000; // 64KiB + +const defaultWasmInstance = wasmModule && new WebAssembly.Instance(wasmModule); + +export const WASM_AVAILABLE = !!wasmModule && process.env.NO_WASM !== "true"; + +function copyArrayBuffer(dest: ArrayBuffer, src: Uint8Array) { + const destView = new Uint8Array(dest); + destView.set(src); +} + +export function utf8DecodeWasm( + bytes: Uint8Array, + offset: number, + byteLength: number, + wasmInstance = defaultWasmInstance, +): string { + if (!wasmInstance) { + throw new Error("No WebAssembly available"); + } + + const currentMemorySize: number = wasmInstance.exports.memory.buffer.byteLength; + const requiredMemorySize = bytes.length * 3; // input(utf8) + output(utf16) + if (currentMemorySize < requiredMemorySize) { + const page = Math.ceil((requiredMemorySize - currentMemorySize) / WASM_MEMORY_PAGE_SIZE); + wasmInstance.exports.memory.grow(page); + } + + copyArrayBuffer(wasmInstance.exports.memory.buffer, bytes.subarray(offset, offset + byteLength)); + // console.log(instanceMemory.subarray(0, 10)); + + const outputStart = Math.ceil(byteLength / Uint16Array.BYTES_PER_ELEMENT) * Uint16Array.BYTES_PER_ELEMENT; + const outputEnd = wasmInstance.exports.utf8ToUtf16(byteLength, outputStart); + const codepoints = new Uint16Array( + wasmInstance.exports.memory.buffer, + outputStart, + (outputEnd - outputStart) / Uint16Array.BYTES_PER_ELEMENT, + ); + // console.log([byteLength, outputStart, outputEnd]); + // console.log(instanceMemory.subarray(0, 10)); + // console.log(utf16array); + return String.fromCharCode.apply(String, codepoints as any); +} diff --git a/tools/pack-wasm.ts b/tools/pack-wasm.ts new file mode 100644 index 00000000..878dd8a4 --- /dev/null +++ b/tools/pack-wasm.ts @@ -0,0 +1,25 @@ +// pack build/wasm/*.wasm + +import fs from "fs"; +import { resolve } from "path"; +import base64 from "base64-js"; + +const artifactDir = resolve(__dirname, "../build/wasm"); +for (const basename of fs.readdirSync(artifactDir)) { + const file = resolve(artifactDir, basename); + if (!file.endsWith(".wasm")) { + continue; + } + + const blob = fs.readFileSync(file); + fs.writeFileSync( + `${file}.js`, + `// generated from ${basename} +var base64 = require("base64-js"); +module.exports.wasmModule = new WebAssembly.Module( + base64.toByteArray( + ${JSON.stringify(base64.fromByteArray(blob))} +)); +`, + ); +} diff --git a/wasmModule.ts b/wasmModule.ts deleted file mode 100644 index 6359e7e2..00000000 --- a/wasmModule.ts +++ /dev/null @@ -1,27 +0,0 @@ -import fs from "fs"; - -declare var WebAssembly: any; - -const wasmModule = new WebAssembly.Module(fs.readFileSync(__dirname + "/build/wasm/optimized.wasm")); -const wasmInstance = new WebAssembly.Instance(wasmModule); -let instanceMemory = new Uint8Array(wasmInstance.exports.memory.buffer); - -export function utf8Decode2(bytes: Uint8Array, offset: number, byteLength: number): string { - const workingMemorySize = bytes.length * 3; // input(utf8) + output(utf16) - if (instanceMemory.length < workingMemorySize) { - const page = Math.ceil((workingMemorySize - instanceMemory.length) / 0x10000); - wasmInstance.exports.memory.grow(page); - instanceMemory = new Uint8Array(wasmInstance.exports.memory.buffer); - } - - instanceMemory.set(bytes.subarray(offset, offset + byteLength)); - // console.log(instanceMemory.subarray(0, 10)); - - const outputStart = Math.ceil(byteLength / 2) * 2; - const outputEnd = wasmInstance.exports.utf8ToUtf16(byteLength, outputStart); - const utf16array = new Uint16Array(wasmInstance.exports.memory.buffer, outputStart, (outputEnd - outputStart) / 2); - // console.log([byteLength, outputStart, outputEnd]); - // console.log(instanceMemory.subarray(0, 10)); - // console.log(utf16array); - return String.fromCharCode.apply(String, utf16array as any); -} From 6066940fc168bb6a50752a2232b8b1b790323a69 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月16日 10:32:52 +0900 Subject: [PATCH 07/27] move use of utf8DecodeWasm to utf8.ts --- src/Decoder.ts | 8 +------- src/utils/utf8.ts | 7 +++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Decoder.ts b/src/Decoder.ts index 063ee120..5bd004a1 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -2,7 +2,6 @@ import { prettyByte } from "./utils/prettyByte"; import { ExtensionCodec } from "./ExtensionCodec"; import { getInt64, getUint64 } from "./utils/int"; import { utf8Decode } from "./utils/utf8"; -import { utf8DecodeWasm, WASM_AVAILABLE } from "./wasmFunctions"; import { createDataView, ensureUint8Array } from "./utils/typedArrays"; enum State { @@ -379,12 +378,7 @@ export class Decoder { throw MORE_DATA; } - let object: string; - if (WASM_AVAILABLE) { - object = utf8DecodeWasm(this.bytes, this.pos + headOffset, byteLength); - } else { - object = utf8Decode(this.bytes, this.pos + headOffset, byteLength); - } + const object = utf8Decode(this.bytes, this.pos + headOffset, byteLength); this.pos += headOffset + byteLength; return object; } diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index 7d4bf43d..6c40a624 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -1,4 +1,7 @@ import { prettyByte } from "./prettyByte"; +import { WASM_AVAILABLE, utf8DecodeWasm } from "../wasmFunctions"; + +const WASM_THRESHOLD = 0x100; export function utf8Count(str: string): number { const strLength = str.length; @@ -86,6 +89,10 @@ export function utf8Encode(str: string, view: DataView, offset: number): void { } export function utf8Decode(bytes: Uint8Array, offset: number, byteLength: number): string { + if (WASM_AVAILABLE && byteLength> WASM_THRESHOLD) { + return utf8DecodeWasm(bytes, offset, byteLength); + } + const out: Array = []; const end = offset + byteLength; while (offset < end) { From 8a663c4f1b855a7becd4f261989882b4197f0613 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月16日 16:25:17 +0900 Subject: [PATCH 08/27] use memory allocator in wasm functions --- assembly/index.ts | 45 +---------------- assembly/memory.ts | 9 ++++ assembly/utf8DecodeToUint16Array.ts | 46 ++++++++++++++++++ benchmark/string.ts | 1 + package.json | 4 +- src/utils/utf8.ts | 4 +- src/wasmFunctions.ts | 75 +++++++++++++++++------------ 7 files changed, 106 insertions(+), 78 deletions(-) create mode 100644 assembly/memory.ts create mode 100644 assembly/utf8DecodeToUint16Array.ts diff --git a/assembly/index.ts b/assembly/index.ts index e62a1565..8451e80d 100644 --- a/assembly/index.ts +++ b/assembly/index.ts @@ -2,46 +2,5 @@ // memory is assumed: // [input][output] - -export function utf8ToUtf16(byteLength: i32, outputOffset: i32): i32 { - let inputOffset: i32 = 0; - while (inputOffset < byteLength) { - let byte1: u16 = load(inputOffset++); - if ((byte1 & 0x80) === 0) { - // 1 byte - store(outputOffset, byte1); - outputOffset += 2; - } else if ((byte1 & 0xe0) === 0xc0) { - // 2 bytes - let byte2: u16 = load(inputOffset++) & 0x3f; - // FIXME: consider endians - store(outputOffset, ((byte1 & 0x1f) << 6) | byte2); - outputOffset += 2; - } else if ((byte1 & 0xf0) === 0xe0) { - // 3 bytes - let byte2: u16 = load(inputOffset++) & 0x3f; - let byte3: u16 = load(inputOffset++) & 0x3f; - store(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3); - outputOffset += 2; - } else if ((byte1 & 0xf8) === 0xf0) { - // 4 bytes - let byte2 = load(inputOffset++) & 0x3f; - let byte3 = load(inputOffset++) & 0x3f; - let byte4 = load(inputOffset++) & 0x3f; - let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4; - if (codepoint> 0xffff) { - codepoint -= 0x10000; - store(outputOffset, ((codepoint>>> 10) & 0x3ff) | 0xd800); - outputOffset += 2; - codepoint = 0xdc00 | (codepoint & 0x3ff); - } - store(outputOffset, codepoint); - outputOffset += 2; - } else { - // invalid UTF-8 - store(outputOffset++, byte1); - outputOffset += 2; - } - } - return outputOffset; -} +export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array"; +export { malloc, free } from "./memory"; diff --git a/assembly/memory.ts b/assembly/memory.ts new file mode 100644 index 00000000..391824b8 --- /dev/null +++ b/assembly/memory.ts @@ -0,0 +1,9 @@ +import "allocator/tlsf"; + +export function malloc(size: usize): usize { + return memory.allocate(size); +} + +export function free(ptr: usize): void { + memory.free(ptr); +} diff --git a/assembly/utf8DecodeToUint16Array.ts b/assembly/utf8DecodeToUint16Array.ts new file mode 100644 index 00000000..d85efee7 --- /dev/null +++ b/assembly/utf8DecodeToUint16Array.ts @@ -0,0 +1,46 @@ +export function utf8DecodeToUint16Array(outputPtr: usize, inputPtr: usize, byteLength: usize): usize { + let inputOffset = inputPtr; + let outputOffset = outputPtr; + let inputOffsetEnd = inputOffset + byteLength; + const u16s = sizeof(); + + while (inputOffset < inputOffsetEnd) { + let byte1: u16 = load(inputOffset++); + if ((byte1 & 0x80) === 0) { + // 1 byte + store(outputOffset, byte1); + outputOffset += u16s; + } else if ((byte1 & 0xe0) === 0xc0) { + // 2 bytes + let byte2: u16 = load(inputOffset++) & 0x3f; + // FIXME: consider endians + store(outputOffset, ((byte1 & 0x1f) << 6) | byte2); + outputOffset += u16s; + } else if ((byte1 & 0xf0) === 0xe0) { + // 3 bytes + let byte2: u16 = load(inputOffset++) & 0x3f; + let byte3: u16 = load(inputOffset++) & 0x3f; + store(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3); + outputOffset += u16s; + } else if ((byte1 & 0xf8) === 0xf0) { + // 4 bytes + let byte2 = load(inputOffset++) & 0x3f; + let byte3 = load(inputOffset++) & 0x3f; + let byte4 = load(inputOffset++) & 0x3f; + let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4; + if (codepoint> 0xffff) { + codepoint -= 0x10000; + store(outputOffset, ((codepoint>>> 10) & 0x3ff) | 0xd800); + outputOffset += u16s; + codepoint = 0xdc00 | (codepoint & 0x3ff); + } + store(outputOffset, codepoint); + outputOffset += u16s; + } else { + // invalid UTF-8 + store(outputOffset++, byte1); + outputOffset += u16s; + } + } + return (outputOffset - outputPtr) / u16s; +} diff --git a/benchmark/string.ts b/benchmark/string.ts index b47613d6..fa1e4aa1 100644 --- a/benchmark/string.ts +++ b/benchmark/string.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-console */ import { encode, decode } from "../src"; const data = "Hello, 🌏\n".repeat(10000); diff --git a/package.json b/package.json index 0f577b5a..d8d5b665 100644 --- a/package.json +++ b/package.json @@ -24,8 +24,8 @@ "profile:encode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "profile:decode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts", - "asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug --measure", - "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3 --measure", + "asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug", + "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3", "asbuild": "rm -rf build/wasm && npm run asbuild:untouched && npm run asbuild:optimized && ts-node tools/pack-wasm.ts" }, "repository": { diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index 6c40a624..a08191d7 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -1,7 +1,7 @@ import { prettyByte } from "./prettyByte"; -import { WASM_AVAILABLE, utf8DecodeWasm } from "../wasmFunctions"; +import { WASM_AVAILABLE, WASM_DEBUG, utf8DecodeWasm } from "../wasmFunctions"; -const WASM_THRESHOLD = 0x100; +const WASM_THRESHOLD = WASM_DEBUG ? 0 : 0x100; export function utf8Count(str: string): number { const strLength = str.length; diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index a3ec2def..62c8fc6a 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -1,19 +1,42 @@ +/* eslint-disable no-console */ + +// TODO: Use TypeScript built-in type +declare const WebAssembly: any; + +export const WASM_DEBUG = !!(process && process.env.WASM_DEBUG === "true"); + let wasmModule: any; try { - wasmModule = require("../build/wasm/optimized.wasm.js").wasmModule; -} catch { + if (WASM_DEBUG) { + wasmModule = require("../build/wasm/untouched.wasm.js").wasmModule; + } else { + wasmModule = require("../build/wasm/optimized.wasm.js").wasmModule; + } +} catch (e) { + if (WASM_DEBUG) { + console.error(e); + } // WebAssembly is not supported. } -declare var WebAssembly: any; -const WASM_MEMORY_PAGE_SIZE = 0x10000; // 64KiB +function abort(filename: number, line: number, column: number): void { + throw new Error(`abort called at ${filename}:${line}:${column}`); +} -const defaultWasmInstance = wasmModule && new WebAssembly.Instance(wasmModule); +const defaultWasmInstance = + wasmModule && + new WebAssembly.Instance(wasmModule, { + env: { + abort, + }, + }); export const WASM_AVAILABLE = !!wasmModule && process.env.NO_WASM !== "true"; -function copyArrayBuffer(dest: ArrayBuffer, src: Uint8Array) { - const destView = new Uint8Array(dest); +type pointer = number; + +function setMemory(wasm: any, destPtr: pointer, src: Uint8Array, size: number) { + const destView = new Uint8Array(wasm.exports.memory.buffer, destPtr, size); destView.set(src); } @@ -21,31 +44,21 @@ export function utf8DecodeWasm( bytes: Uint8Array, offset: number, byteLength: number, - wasmInstance = defaultWasmInstance, + wasm = defaultWasmInstance, ): string { - if (!wasmInstance) { - throw new Error("No WebAssembly available"); - } + const inputPtr: pointer = wasm.exports.malloc(byteLength); + // in worst case, the UTF-16 array uses the same as byteLength * 2 + const outputPtr: pointer = wasm.exports.malloc(byteLength * 2); + try { + setMemory(wasm, inputPtr, bytes.subarray(offset, offset + byteLength), byteLength); - const currentMemorySize: number = wasmInstance.exports.memory.buffer.byteLength; - const requiredMemorySize = bytes.length * 3; // input(utf8) + output(utf16) - if (currentMemorySize < requiredMemorySize) { - const page = Math.ceil((requiredMemorySize - currentMemorySize) / WASM_MEMORY_PAGE_SIZE); - wasmInstance.exports.memory.grow(page); - } + const outputArraySize = wasm.exports.utf8DecodeToUint16Array(outputPtr, inputPtr, byteLength); + const codepoints = new Uint16Array(wasm.exports.memory.buffer, outputPtr, outputArraySize); - copyArrayBuffer(wasmInstance.exports.memory.buffer, bytes.subarray(offset, offset + byteLength)); - // console.log(instanceMemory.subarray(0, 10)); - - const outputStart = Math.ceil(byteLength / Uint16Array.BYTES_PER_ELEMENT) * Uint16Array.BYTES_PER_ELEMENT; - const outputEnd = wasmInstance.exports.utf8ToUtf16(byteLength, outputStart); - const codepoints = new Uint16Array( - wasmInstance.exports.memory.buffer, - outputStart, - (outputEnd - outputStart) / Uint16Array.BYTES_PER_ELEMENT, - ); - // console.log([byteLength, outputStart, outputEnd]); - // console.log(instanceMemory.subarray(0, 10)); - // console.log(utf16array); - return String.fromCharCode.apply(String, codepoints as any); + // FIXME: split codepoints if it is too long (the maximum size depends on the JS engine, though). + return String.fromCharCode.apply(String, codepoints as any); + } finally { + wasm.exports.free(inputPtr); + wasm.exports.free(outputPtr); + } } From 01fd626ae3a5f12af27d8b0e8ca85691d4004a10 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月16日 17:22:49 +0900 Subject: [PATCH 09/27] refactor dist structure --- package.json | 15 ++++++++------- src/wasmFunctions.ts | 27 ++++++++++++++------------- tools/pack-wasm.ts | 2 +- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/package.json b/package.json index d8d5b665..6d7a6dd0 100644 --- a/package.json +++ b/package.json @@ -7,9 +7,9 @@ "types": "./dist/index.d.ts", "scripts": { "build": "npm publish --dry-run", - "prepare": "rm -rf dist dist.* ; tsc -p tsconfig.dist.json && webpack && npm run asbuild", + "prepare": "npm run clean && npm run asbuild && tsc -p tsconfig.dist.json && webpack", "prepublishOnly": "TEST_DIST=true npm run test", - "clean": "rm -rf build dist dist.*", + "clean": "rimraf build dist dist.*", "test": "mocha 'test/**/*.test.ts'", "test:cover": "npx nyc mocha 'test/**/*.test.ts'", "test:browser": "karma start --single-run", @@ -21,12 +21,12 @@ "lint:fix": "eslint --fix --ext .ts src test && npm run format", "lint:print-config": "eslint --print-config .eslintrc.js", "format": "prettier --write 'src/**/*.ts' 'test/**/*.ts'", - "profile:encode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", - "profile:decode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", + "profile:encode": "rimraf isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", + "profile:decode": "rimraf isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts", - "asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug", - "asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3", - "asbuild": "rm -rf build/wasm && npm run asbuild:untouched && npm run asbuild:optimized && ts-node tools/pack-wasm.ts" + "asbuild:untouched": "asc assembly/index.ts -b dist/wasm/untouched.wasm -t dist/wasm/untouched.wat --sourceMap --validate --debug", + "asbuild:optimized": "asc assembly/index.ts -b dist/wasm/optimized.wasm -t dist/wasm/optimized.wat --sourceMap --validate -O3", + "asbuild": "npm run asbuild:untouched && npm run asbuild:optimized && ts-node tools/pack-wasm.ts" }, "repository": { "type": "git", @@ -70,6 +70,7 @@ "msgpack-lite": "^0.1.26", "msgpack-test-js": "^1.0.0", "prettier": "^1.17.1", + "rimraf": "^2.6.3", "ts-loader": "^5.4.5", "ts-node": "^8.1.0", "typescript": "^3.4.5", diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index 62c8fc6a..c9078cf8 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -3,21 +3,22 @@ // TODO: Use TypeScript built-in type declare const WebAssembly: any; -export const WASM_DEBUG = !!(process && process.env.WASM_DEBUG === "true"); +export const WASM_DEBUG = typeof process !== "undefined" && process.env.WASM_DEBUG === "true"; -let wasmModule: any; -try { - if (WASM_DEBUG) { - wasmModule = require("../build/wasm/untouched.wasm.js").wasmModule; - } else { - wasmModule = require("../build/wasm/optimized.wasm.js").wasmModule; - } -} catch (e) { - if (WASM_DEBUG) { - console.error(e); +let { wasmModule } = (() => { + try { + if (WASM_DEBUG) { + return require("../dist/wasm/untouched.wasm.js"); + } else { + return require("../dist/wasm/optimized.wasm.js"); + } + } catch (e) { + if (WASM_DEBUG) { + console.error("WebAssembly is not supported", e); + } + return {}; } - // WebAssembly is not supported. -} +})(); function abort(filename: number, line: number, column: number): void { throw new Error(`abort called at ${filename}:${line}:${column}`); diff --git a/tools/pack-wasm.ts b/tools/pack-wasm.ts index 878dd8a4..1bb1d6fb 100644 --- a/tools/pack-wasm.ts +++ b/tools/pack-wasm.ts @@ -4,7 +4,7 @@ import fs from "fs"; import { resolve } from "path"; import base64 from "base64-js"; -const artifactDir = resolve(__dirname, "../build/wasm"); +const artifactDir = resolve(__dirname, "../dist/wasm"); for (const basename of fs.readdirSync(artifactDir)) { const file = resolve(artifactDir, basename); if (!file.endsWith(".wasm")) { From 7694547cb4c943c7fc41d8c81fc31ecdaae9aa26 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月16日 17:29:46 +0900 Subject: [PATCH 10/27] assumes process.env is always available --- src/wasmFunctions.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index c9078cf8..5a8d226b 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -3,7 +3,7 @@ // TODO: Use TypeScript built-in type declare const WebAssembly: any; -export const WASM_DEBUG = typeof process !== "undefined" && process.env.WASM_DEBUG === "true"; +export const WASM_DEBUG = process.env.WASM_DEBUG === "true"; let { wasmModule } = (() => { try { From e36f0680c902bb338303c7dc95601a34ee9a12d1 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月16日 22:04:36 +0900 Subject: [PATCH 11/27] implement utf8CountWasm() --- assembly/index.ts | 1 + assembly/utf8CountUint16Array.ts | 43 ++++++++++++++++++++++++++++++++ src/Encoder.ts | 8 ++++-- src/utils/utf8.ts | 17 +++++++------ src/wasmFunctions.ts | 31 +++++++++++++++++++---- 5 files changed, 86 insertions(+), 14 deletions(-) create mode 100644 assembly/utf8CountUint16Array.ts diff --git a/assembly/index.ts b/assembly/index.ts index 8451e80d..d26acd3e 100644 --- a/assembly/index.ts +++ b/assembly/index.ts @@ -3,4 +3,5 @@ // memory is assumed: // [input][output] export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array"; +export { utf8CountUint16Array } from "./utf8CountUint16Array"; export { malloc, free } from "./memory"; diff --git a/assembly/utf8CountUint16Array.ts b/assembly/utf8CountUint16Array.ts new file mode 100644 index 00000000..11332bb4 --- /dev/null +++ b/assembly/utf8CountUint16Array.ts @@ -0,0 +1,43 @@ +export function utf8CountUint16Array(inputPtr: usize, strLength: usize): usize { + const u16s = sizeof(); + + let byteLength: usize = 0; + let pos: usize = inputPtr; + let end = inputPtr + strLength * u16s; + while (pos < end) { + let value = load(pos); + pos += u16s; + + if (value>= 0xd800 && value <= 0xdbff) { + // high surrogate + if (pos < strLength) { + let extra = load(pos); + if ((extra & 0xfc00) === 0xdc00) { + pos += u16s; + value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; + } + } + if (value>= 0xd800 && value <= 0xdbff) { + continue; // drop lone surrogate + } + } + + if ((value & 0xffffff80) === 0) { + // 1-byte + byteLength++; + continue; + } else if ((value & 0xfffff800) === 0) { + // 2-bytes + byteLength += 2; + } else if ((value & 0xffff0000) === 0) { + // 3-byte + byteLength += 3; + } else if ((value & 0xffe00000) === 0) { + // 4-byte + byteLength += 4; + } else { + unreachable(); + } + } + return byteLength; +} diff --git a/src/Encoder.ts b/src/Encoder.ts index 7f7a3400..4359090b 100644 --- a/src/Encoder.ts +++ b/src/Encoder.ts @@ -118,7 +118,11 @@ export class Encoder { } encodeString(object: string) { - const byteLength = utf8Count(object); + const units = new Uint16Array(object.length); + for (let i = 0; i < object.length; i++) { + units[i] = object.charCodeAt(i); + } + const byteLength = utf8Count(units); if (byteLength < 32) { // fixstr this.writeU8(0xa0 + byteLength); @@ -139,7 +143,7 @@ export class Encoder { } this.ensureBufferSizeToWrite(byteLength); - utf8Encode(object, this.view, this.pos); + utf8Encode(units, this.view, this.pos); this.pos += byteLength; } diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index a08191d7..ba73d8f8 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -1,19 +1,22 @@ import { prettyByte } from "./prettyByte"; -import { WASM_AVAILABLE, WASM_DEBUG, utf8DecodeWasm } from "../wasmFunctions"; +import { WASM_AVAILABLE, WASM_DEBUG, utf8DecodeWasm, utf8CountWasm } from "../wasmFunctions"; const WASM_THRESHOLD = WASM_DEBUG ? 0 : 0x100; -export function utf8Count(str: string): number { +export function utf8Count(str: Uint16Array): number { const strLength = str.length; + if (WASM_AVAILABLE && strLength> WASM_THRESHOLD) { + return utf8CountWasm(str); + } let byteLength = 0; let pos = 0; while (pos < strLength) { - let value = str.charCodeAt(pos++); + let value = str[pos++]; if (value>= 0xd800 && value <= 0xdbff) { // high surrogate if (pos < strLength) { - const extra = str.charCodeAt(pos); + const extra = str[pos]; if ((extra & 0xfc00) === 0xdc00) { ++pos; value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; @@ -44,16 +47,16 @@ export function utf8Count(str: string): number { return byteLength; } -export function utf8Encode(str: string, view: DataView, offset: number): void { +export function utf8Encode(str: Uint16Array, view: DataView, offset: number): void { const strLength = str.length; let pos = 0; while (pos < strLength) { - let value = str.charCodeAt(pos++); + let value = str[pos++]; if (value>= 0xd800 && value <= 0xdbff) { // high surrogate if (pos < strLength) { - const extra = str.charCodeAt(pos); + const extra = str[pos]; if ((extra & 0xfc00) === 0xdc00) { ++pos; value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index 5a8d226b..a8a11b38 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -3,9 +3,14 @@ // TODO: Use TypeScript built-in type declare const WebAssembly: any; -export const WASM_DEBUG = process.env.WASM_DEBUG === "true"; +const NO_WASM = process.env.NO_WASM === "true" || process.env.MSGPACK_NO_WASM === "true"; +export const WASM_DEBUG = process.env.WASM_DEBUG === "true" || process.env.MSGPACK_WASM_DEBUG === "true"; let { wasmModule } = (() => { + if (NO_WASM) { + return {}; + } + try { if (WASM_DEBUG) { return require("../dist/wasm/untouched.wasm.js"); @@ -20,7 +25,10 @@ let { wasmModule } = (() => { } })(); +export const WASM_AVAILABLE = !!wasmModule; + function abort(filename: number, line: number, column: number): void { + // FIXME: filename is just a number (pointer?) throw new Error(`abort called at ${filename}:${line}:${column}`); } @@ -32,15 +40,28 @@ const defaultWasmInstance = }, }); -export const WASM_AVAILABLE = !!wasmModule && process.env.NO_WASM !== "true"; - type pointer = number; -function setMemory(wasm: any, destPtr: pointer, src: Uint8Array, size: number) { +function setMemoryU8(wasm: any, destPtr: pointer, src: Uint8Array, size: number) { const destView = new Uint8Array(wasm.exports.memory.buffer, destPtr, size); destView.set(src); } +function setMemoryU16(wasm: any, destPtr: pointer, src: Uint16Array, size: number) { + const destView = new Uint16Array(wasm.exports.memory.buffer, destPtr, size); + destView.set(src); +} + +export function utf8CountWasm(units: Uint16Array, wasm = defaultWasmInstance): number { + const inputPtr: pointer = wasm.exports.malloc(units.byteLength); + try { + setMemoryU16(wasm, inputPtr, units, units.length); + return wasm.exports.utf8CountUint16Array(inputPtr, units.length); + } finally { + wasm.exports.free(inputPtr); + } +} +// A wrapper function for utf8DecodeToUint16Array() export function utf8DecodeWasm( bytes: Uint8Array, offset: number, @@ -51,7 +72,7 @@ export function utf8DecodeWasm( // in worst case, the UTF-16 array uses the same as byteLength * 2 const outputPtr: pointer = wasm.exports.malloc(byteLength * 2); try { - setMemory(wasm, inputPtr, bytes.subarray(offset, offset + byteLength), byteLength); + setMemoryU8(wasm, inputPtr, bytes.subarray(offset, offset + byteLength), byteLength); const outputArraySize = wasm.exports.utf8DecodeToUint16Array(outputPtr, inputPtr, byteLength); const codepoints = new Uint16Array(wasm.exports.memory.buffer, outputPtr, outputArraySize); From c107406b8e6dedcbcd081a3cddc8fc4e46cd4610 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月18日 21:19:17 +0900 Subject: [PATCH 12/27] implement the whole string encoder in wasm --- .eslintrc.js | 1 + assembly/index.ts | 1 + assembly/memoryBE.ts | 100 ++++++++++++++++++++++++++++++ assembly/utf8CountUint16Array.ts | 13 ++-- assembly/utf8EncodeUint16Array.ts | 81 ++++++++++++++++++++++++ package-lock.json | 53 +++++++++------- package.json | 8 +-- src/Decoder.ts | 13 ++-- src/Encoder.ts | 32 +++++++--- src/utils/utf8.ts | 44 ++++++------- src/wasmFunctions.ts | 73 ++++++++++++++++------ 11 files changed, 330 insertions(+), 89 deletions(-) create mode 100644 assembly/memoryBE.ts create mode 100644 assembly/utf8EncodeUint16Array.ts diff --git a/.eslintrc.js b/.eslintrc.js index 23842c50..4da1198a 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -24,6 +24,7 @@ module.exports = { // "prefer-const": "warn", // TODO: AssemblyScript has different semantics. "guard-for-in": "warn", "curly": "warn", + "no-param-reassign": "warn", "@typescript-eslint/no-unused-vars":"warn", "@typescript-eslint/array-type": ["error", "generic"], diff --git a/assembly/index.ts b/assembly/index.ts index d26acd3e..2358ad1c 100644 --- a/assembly/index.ts +++ b/assembly/index.ts @@ -4,4 +4,5 @@ // [input][output] export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array"; export { utf8CountUint16Array } from "./utf8CountUint16Array"; +export { utf8EncodeUint16Array } from "./utf8EncodeUint16Array"; export { malloc, free } from "./memory"; diff --git a/assembly/memoryBE.ts b/assembly/memoryBE.ts new file mode 100644 index 00000000..ec505036 --- /dev/null +++ b/assembly/memoryBE.ts @@ -0,0 +1,100 @@ +// load/store values in big-endian + +@inline +export function loadFload32BE(byteOffset: usize): f32 { + return reinterpret(bswap(load(byteOffset))); +} + +@inline +export function loadFloat64BE(byteOffset: usize): f64 { + return reinterpret(bswap(load(byteOffset))); +} + +@inline +export function loadInt8BE(byteOffset: usize): i8 { + return load(byteOffset); +} + +export function loadInt16BE(byteOffset: usize): i16 { + return bswap(load(byteOffset)); +} + +@inline +export function loadInt32BE(byteOffset: usize): i32 { + return bswap(load(byteOffset)); +} + +@inline +export function loadInt64BE(byteOffset: usize): i64 { + return bswap(load(byteOffset)); +} + +@inline +export function loadUint8BE(byteOffset: usize): u8 { + return load(byteOffset); +} + +@inline +export function loadUint16BE(byteOffset: usize): u16 { + return bswap(load(byteOffset)); +} + +@inline +export function loadUint32BE(byteOffset: usize): u32 { + return bswap(load(byteOffset)); +} + +@inline +export function loadUint64BE(byteOffset: usize): u64 { + return bswap(load(byteOffset)); +} + +@inline +export function storeFloat32BE(byteOffset: usize, value: f32): void { + store(byteOffset, bswap(reinterpret(value))); +} + +@inline +export function storeFloat64BE(byteOffset: usize, value: f64): void { + store(byteOffset, bswap(reinterpret(value))); +} + +@inline +export function storeInt8BE(byteOffset: usize, value: i8): void { + store(byteOffset, value); +} + +@inline +export function storeInt16BE(byteOffset: usize, value: i16): void { + store(byteOffset, bswap(value)); +} + +@inline +export function storeInt32BE(byteOffset: usize, value: i32): void { + store(byteOffset, bswap(value)); +} + +@inline +export function storeInt64BE(byteOffset: usize, value: i64): void { + store(byteOffset, bswap(value)); +} + +@inline +export function storeUint8BE(byteOffset: usize, value: u8): void { + store(byteOffset, value); +} + +@inline +export function storeUint16BE(byteOffset: usize, value: u16): void { + store(byteOffset, bswap(value)); +} + +@inline +export function storeUint32BE(byteOffset: usize, value: u32): void { + store(byteOffset, bswap(value)); +} + +@inline +export function storeUint64BE(byteOffset: usize, value: u64): void { + store(byteOffset, bswap(value)); +} diff --git a/assembly/utf8CountUint16Array.ts b/assembly/utf8CountUint16Array.ts index 11332bb4..dfc0673b 100644 --- a/assembly/utf8CountUint16Array.ts +++ b/assembly/utf8CountUint16Array.ts @@ -1,17 +1,20 @@ -export function utf8CountUint16Array(inputPtr: usize, strLength: usize): usize { +import { loadUint16BE } from "./memoryBE"; + +// inputPtr: u16* +export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize { const u16s = sizeof(); let byteLength: usize = 0; let pos: usize = inputPtr; - let end = inputPtr + strLength * u16s; + let end = inputPtr + inputLength * u16s; while (pos < end) { - let value = load(pos); + let value: u32 = loadUint16BE(pos); pos += u16s; if (value>= 0xd800 && value <= 0xdbff) { // high surrogate - if (pos < strLength) { - let extra = load(pos); + if (pos < end) { + let extra: u32 = loadUint16BE(pos); if ((extra & 0xfc00) === 0xdc00) { pos += u16s; value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; diff --git a/assembly/utf8EncodeUint16Array.ts b/assembly/utf8EncodeUint16Array.ts new file mode 100644 index 00000000..4f3ede92 --- /dev/null +++ b/assembly/utf8EncodeUint16Array.ts @@ -0,0 +1,81 @@ +import { utf8CountUint16Array } from "./utf8CountUint16Array"; +import { storeUint8BE, storeUint16BE, storeUint32BE, loadUint16BE } from "./memoryBE"; + +function storeStringHeader(outputPtr: usize, utf8ByteLength: usize): usize { + let ptr = outputPtr; + if (utf8ByteLength < 32) { + // fixstr + storeUint8BE(ptr++, 0xa0 + (utf8ByteLength as u8)); + } else if (utf8ByteLength < 0x100) { + // str 8 + storeUint8BE(ptr++, 0xd9); + storeUint8BE(ptr++, utf8ByteLength as u8); + } else if (utf8ByteLength < 0x10000) { + // str 16 + storeUint8BE(ptr++, 0xda); + storeUint16BE(ptr, utf8ByteLength as u16); + ptr += sizeof(); + } else if ((utf8ByteLength as u64) < 0x100000000) { + // str 32 + storeUint8BE(ptr++, 0xdb); + storeUint32BE(ptr, utf8ByteLength as u32); + ptr += sizeof(); + } else { + throw new Error(`Too long string: ${utf8ByteLength} bytes in UTF-8`); + } + return ptr; +} + +// outputPtr: u8* +// inputPtr: u16* +// It adds MessagePack str head bytes to the output +export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLength: usize): usize { + let utf8ByteLength = utf8CountUint16Array(inputPtr, inputLength); + let strHeaderOffset = storeStringHeader(outputPtr, utf8ByteLength); + + const u16s = sizeof(); + let inputOffset = inputPtr; + let inputEnd = inputPtr + inputLength * u16s; + let outputOffset = strHeaderOffset; + while (inputOffset < inputEnd) { + let value: u32 = loadUint16BE(inputOffset); + inputOffset += u16s; + if (value>= 0xd800 && value <= 0xdbff) { + // high surrogate + if (inputOffset < inputEnd) { + let extra: u32 = loadUint16BE(inputOffset); + if ((extra & 0xfc00) === 0xdc00) { + inputOffset += u16s; + value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; + } + } + if (value>= 0xd800 && value <= 0xdbff) { + continue; // drop lone surrogate + } + } + + if ((value & 0xffffff80) === 0) { + // 1-byte + store(outputOffset++, value); + continue; + } else if ((value & 0xfffff800) === 0) { + // 2-bytes + store(outputOffset++, ((value>> 6) & 0x1f) | 0xc0); + } else if ((value & 0xffff0000) === 0) { + // 3-byte + store(outputOffset++, ((value>> 12) & 0x0f) | 0xe0); + store(outputOffset++, ((value>> 6) & 0x3f) | 0x80); + } else if ((value & 0xffe00000) === 0) { + // 4-byte + store(outputOffset++, ((value>> 18) & 0x07) | 0xf0); + store(outputOffset++, ((value>> 12) & 0x3f) | 0x80); + store(outputOffset++, ((value>> 6) & 0x3f) | 0x80); + } else { + unreachable(); + } + + store(outputOffset++, (value & 0x3f) | 0x80); + } + + return outputOffset - outputPtr; +} diff --git a/package-lock.json b/package-lock.json index 491fd726..ce81cd74 100644 --- a/package-lock.json +++ b/package-lock.json @@ -52,9 +52,9 @@ "dev": true }, "@types/node": { - "version": "11.13.10", - "resolved": "https://registry.npmjs.org/@types/node/-/node-11.13.10.tgz", - "integrity": "sha512-leUNzbFTMX94TWaIKz8N15Chu55F9QSH+INKayQr5xpkasBQBRF3qQXfo3/dOnMU/dEIit+Y/SU8HyOjq++GwA==", + "version": "11.13.11", + "resolved": "https://registry.npmjs.org/@types/node/-/node-11.13.11.tgz", + "integrity": "sha512-blLeR+KIy26km1OU8yTLUlSyVCOvT6+wPq/77tIA+uSHHa4yYQosn+bbaJqPtWId0wjVClUtD7aXzDbZeKWqig==", "dev": true }, "@typescript-eslint/eslint-plugin": { @@ -1226,9 +1226,9 @@ "dev": true }, "core-js": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.0.1.tgz", - "integrity": "sha512-sco40rF+2KlE0ROMvydjkrVMMG1vYilP2ALoRXcYR4obqbYIuV3Bg+51GEDW+HF8n7NRA+iaA4qD0nD9lo9mew==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.1.2.tgz", + "integrity": "sha512-3poRGjbu56leCtZCZCzCgQ7GcKOflDFnjWIepaPFUsM0IXUBrne10sl3aa2Bkcz3+FjRdIxBe9dAMhIJmEnQNA==", "dev": true }, "core-util-is": { @@ -1804,9 +1804,9 @@ } }, "eslint-config-prettier": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-4.2.0.tgz", - "integrity": "sha512-y0uWc/FRfrHhpPZCYflWC8aE0KRJRY04rdZVfl8cL3sEZmOYyaBdhdlQPjKZBnuRMyLVK+JUZr7HaZFClQiH4w==", + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-4.3.0.tgz", + "integrity": "sha512-sZwhSTHVVz78+kYD3t5pCWSYEdVSBR0PXnwjDRsUs8ytIrK8PLXw+6FKp8r3Z7rx4ZszdetWlXYKOHoUrrwPlA==", "dev": true, "requires": { "get-stdin": "^6.0.0" @@ -3060,6 +3060,12 @@ "integrity": "sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==", "dev": true }, + "is-wsl": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-1.1.0.tgz", + "integrity": "sha1-HxbkqiKwTRM2tmGIpmrzxgDDpm0=", + "dev": true + }, "isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", @@ -4100,9 +4106,9 @@ "dev": true }, "neo-async": { - "version": "2.6.0", - "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.0.tgz", - "integrity": "sha512-MFh0d/Wa7vkKO3Y3LlacqAEeHK0mckVqzDieUKTT+KGxi+zIpeVsFxymkIiRpbpDziHc290Xr9A1O4Om7otoRA==", + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.1.tgz", + "integrity": "sha512-iyam8fBuCUpWeKPGpaNMetEocMt364qkCsfL9JuhjXX6dRnguRVOfk2GZaDpPjcOKiiXCPINZC1GczQ7iTq3Zw==", "dev": true }, "next-tick": { @@ -5782,19 +5788,20 @@ } }, "terser-webpack-plugin": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.2.3.tgz", - "integrity": "sha512-GOK7q85oAb/5kE12fMuLdn2btOS9OBZn4VsecpHDywoUC/jLhSAKOiYo0ezx7ss2EXPMzyEWFoE0s1WLE+4+oA==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.2.4.tgz", + "integrity": "sha512-64IiILNQlACWZLzFlpzNaG0bpQ4ytaB7fwOsbpsdIV70AfLUmIGGeuKL0YV2WmtcrURjE2aOvHD4/lrFV3Rg+Q==", "dev": true, "requires": { - "cacache": "^11.0.2", + "cacache": "^11.3.2", "find-cache-dir": "^2.0.0", + "is-wsl": "^1.1.0", "schema-utils": "^1.0.0", - "serialize-javascript": "^1.4.0", + "serialize-javascript": "^1.7.0", "source-map": "^0.6.1", - "terser": "^3.16.1", - "webpack-sources": "^1.1.0", - "worker-farm": "^1.5.2" + "terser": "^3.17.0", + "webpack-sources": "^1.3.0", + "worker-farm": "^1.7.0" } }, "text-table": { @@ -6263,9 +6270,9 @@ } }, "webpack": { - "version": "4.31.0", - "resolved": "https://registry.npmjs.org/webpack/-/webpack-4.31.0.tgz", - "integrity": "sha512-n6RVO3X0LbbipoE62akME9K/JI7qYrwwufs20VvgNNpqUoH4860KkaxJTbGq5bgkVZF9FqyyTG/0WPLH3PVNJA==", + "version": "4.32.1", + "resolved": "https://registry.npmjs.org/webpack/-/webpack-4.32.1.tgz", + "integrity": "sha512-R0S2tfWP2tZ8ZC2dwgnUVfa9LPvhGWJXjqfgIQ6jply+9ncBbt8IZ9p83uVeqsZ/s8zKA3XyepciWNHnSxxnHg==", "dev": true, "requires": { "@webassemblyjs/ast": "1.8.5", diff --git a/package.json b/package.json index 6d7a6dd0..30fea350 100644 --- a/package.json +++ b/package.json @@ -7,10 +7,11 @@ "types": "./dist/index.d.ts", "scripts": { "build": "npm publish --dry-run", - "prepare": "npm run clean && npm run asbuild && tsc -p tsconfig.dist.json && webpack", + "prepare": "npm run clean && npm run asbuild:production && tsc -p tsconfig.dist.json && webpack", "prepublishOnly": "TEST_DIST=true npm run test", "clean": "rimraf build dist dist.*", "test": "mocha 'test/**/*.test.ts'", + "test:wasm": "MSGPACK_WASM=force mocha 'test/**/*.test.ts'", "test:cover": "npx nyc mocha 'test/**/*.test.ts'", "test:browser": "karma start --single-run", "test:browser:firefox": "karma start --single-run --browsers FirefoxHeadless", @@ -24,9 +25,8 @@ "profile:encode": "rimraf isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "profile:decode": "rimraf isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer", "benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts", - "asbuild:untouched": "asc assembly/index.ts -b dist/wasm/untouched.wasm -t dist/wasm/untouched.wat --sourceMap --validate --debug", - "asbuild:optimized": "asc assembly/index.ts -b dist/wasm/optimized.wasm -t dist/wasm/optimized.wat --sourceMap --validate -O3", - "asbuild": "npm run asbuild:untouched && npm run asbuild:optimized && ts-node tools/pack-wasm.ts" + "asbuild:development": "asc assembly/index.ts -b dist/wasm/msgpack.wasm -t dist/wasm/msgpack.wat --sourceMap --validate --debug && ts-node tools/pack-wasm.ts", + "asbuild:production": "asc assembly/index.ts -b dist/wasm/msgpack.wasm -t dist/wasm/msgpack.wat --sourceMap --validate -O3 && ts-node tools/pack-wasm.ts" }, "repository": { "type": "git", diff --git a/src/Decoder.ts b/src/Decoder.ts index 5bd004a1..038ebd02 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -3,6 +3,7 @@ import { ExtensionCodec } from "./ExtensionCodec"; import { getInt64, getUint64 } from "./utils/int"; import { utf8Decode } from "./utils/utf8"; import { createDataView, ensureUint8Array } from "./utils/typedArrays"; +import { WASM_AVAILABLE, WASM_STR_THRESHOLD, utf8DecodeWasm } from "./wasmFunctions"; enum State { ARRAY, @@ -373,13 +374,17 @@ export class Decoder { }); } - decodeUtf8String(byteLength: number, headOffset: number): string { - if (this.bytes.byteLength < this.pos + headOffset + byteLength) { + decodeUtf8String(byteLength: number, headerOffset: number): string { + if (this.bytes.byteLength < this.pos + headerOffset + byteLength) { throw MORE_DATA; } - const object = utf8Decode(this.bytes, this.pos + headOffset, byteLength); - this.pos += headOffset + byteLength; + const offset = this.pos + headerOffset; + const object = + WASM_AVAILABLE && byteLength> WASM_STR_THRESHOLD + ? utf8DecodeWasm(this.bytes, offset, byteLength) + : utf8Decode(this.bytes, offset, byteLength); + this.pos += headerOffset + byteLength; return object; } diff --git a/src/Encoder.ts b/src/Encoder.ts index 4359090b..b436986b 100644 --- a/src/Encoder.ts +++ b/src/Encoder.ts @@ -3,6 +3,7 @@ import { ExtensionCodec } from "./ExtensionCodec"; import { setInt64, setUint64 } from "./utils/int"; import { ensureUint8Array } from "./utils/typedArrays"; import { ExtData } from "./ExtData"; +import { WASM_AVAILABLE, utf8EncodeWasm, WASM_STR_THRESHOLD } from "./wasmFunctions"; export const DEFAULT_MAX_DEPTH = 100; export const DEFAULT_INITIAL_BUFFER_SIZE = 1024; @@ -117,12 +118,7 @@ export class Encoder { } } - encodeString(object: string) { - const units = new Uint16Array(object.length); - for (let i = 0; i < object.length; i++) { - units[i] = object.charCodeAt(i); - } - const byteLength = utf8Count(units); + writeStringHeader(byteLength: number) { if (byteLength < 32) { // fixstr this.writeU8(0xa0 + byteLength); @@ -141,10 +137,28 @@ export class Encoder { } else { throw new Error(`Too long string: ${byteLength} bytes in UTF-8`); } + } - this.ensureBufferSizeToWrite(byteLength); - utf8Encode(units, this.view, this.pos); - this.pos += byteLength; + encodeString(object: string) { + const maxHeaderSize = 1 + 4; + const strLength = object.length; + + if (WASM_AVAILABLE && strLength> WASM_STR_THRESHOLD) { + // ensure max possible size + const maxSize = maxHeaderSize + strLength * 4; + this.ensureBufferSizeToWrite(maxSize); + + const output = new Uint8Array(this.view.buffer, this.view.byteOffset + this.pos); + const consumedLength = utf8EncodeWasm(object, output); + this.pos += consumedLength; + return; + } else { + const byteLength = utf8Count(object); + this.ensureBufferSizeToWrite(maxHeaderSize + byteLength); + this.writeStringHeader(byteLength); + utf8Encode(object, this.view, this.pos); + this.pos += byteLength; + } } encodeObject(object: unknown, depth: number) { diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index ba73d8f8..bd9198f9 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -1,22 +1,17 @@ import { prettyByte } from "./prettyByte"; -import { WASM_AVAILABLE, WASM_DEBUG, utf8DecodeWasm, utf8CountWasm } from "../wasmFunctions"; -const WASM_THRESHOLD = WASM_DEBUG ? 0 : 0x100; - -export function utf8Count(str: Uint16Array): number { +export function utf8Count(str: string): number { const strLength = str.length; - if (WASM_AVAILABLE && strLength> WASM_THRESHOLD) { - return utf8CountWasm(str); - } let byteLength = 0; let pos = 0; while (pos < strLength) { - let value = str[pos++]; + let value = str.charCodeAt(pos++); + if (value>= 0xd800 && value <= 0xdbff) { // high surrogate if (pos < strLength) { - const extra = str[pos]; + const extra = str.charCodeAt(pos); if ((extra & 0xfc00) === 0xdc00) { ++pos; value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; @@ -47,16 +42,16 @@ export function utf8Count(str: Uint16Array): number { return byteLength; } -export function utf8Encode(str: Uint16Array, view: DataView, offset: number): void { +export function utf8Encode(str: string, output: DataView, outputOffset: number): void { const strLength = str.length; - + let offset = outputOffset; let pos = 0; while (pos < strLength) { - let value = str[pos++]; + let value = str.charCodeAt(pos++); if (value>= 0xd800 && value <= 0xdbff) { // high surrogate if (pos < strLength) { - const extra = str[pos]; + const extra = str.charCodeAt(pos); if ((extra & 0xfc00) === 0xdc00) { ++pos; value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; @@ -69,33 +64,30 @@ export function utf8Encode(str: Uint16Array, view: DataView, offset: number): vo if ((value & 0xffffff80) === 0) { // 1-byte - view.setUint8(offset++, value); + output.setUint8(offset++, value); continue; } else if ((value & 0xfffff800) === 0) { // 2-bytes - view.setUint8(offset++, ((value>> 6) & 0x1f) | 0xc0); + output.setUint8(offset++, ((value>> 6) & 0x1f) | 0xc0); } else if ((value & 0xffff0000) === 0) { // 3-byte - view.setUint8(offset++, ((value>> 12) & 0x0f) | 0xe0); - view.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); + output.setUint8(offset++, ((value>> 12) & 0x0f) | 0xe0); + output.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); } else if ((value & 0xffe00000) === 0) { // 4-byte - view.setUint8(offset++, ((value>> 18) & 0x07) | 0xf0); - view.setUint8(offset++, ((value>> 12) & 0x3f) | 0x80); - view.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); + output.setUint8(offset++, ((value>> 18) & 0x07) | 0xf0); + output.setUint8(offset++, ((value>> 12) & 0x3f) | 0x80); + output.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); } else { throw new Error(`Invalid UTF-8 byte: ${prettyByte(value)} at ${pos}`); } - view.setUint8(offset++, (value & 0x3f) | 0x80); + output.setUint8(offset++, (value & 0x3f) | 0x80); } } -export function utf8Decode(bytes: Uint8Array, offset: number, byteLength: number): string { - if (WASM_AVAILABLE && byteLength> WASM_THRESHOLD) { - return utf8DecodeWasm(bytes, offset, byteLength); - } - +export function utf8Decode(bytes: Uint8Array, outputOffset: number, byteLength: number): string { + let offset = outputOffset; const out: Array = []; const end = offset + byteLength; while (offset < end) { diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index a8a11b38..988b67ca 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -1,10 +1,11 @@ -/* eslint-disable no-console */ - // TODO: Use TypeScript built-in type declare const WebAssembly: any; -const NO_WASM = process.env.NO_WASM === "true" || process.env.MSGPACK_NO_WASM === "true"; -export const WASM_DEBUG = process.env.WASM_DEBUG === "true" || process.env.MSGPACK_WASM_DEBUG === "true"; +// WASM=no - disable WASM functions +// WASM=force - force to use WASM functions +const WASM: string = process.env.MSGPACK_WASM || process.env.WASM || ""; +export const NO_WASM = WASM === "no"; +export const FORCE_WASM = WASM === "force"; let { wasmModule } = (() => { if (NO_WASM) { @@ -12,14 +13,10 @@ let { wasmModule } = (() => { } try { - if (WASM_DEBUG) { - return require("../dist/wasm/untouched.wasm.js"); - } else { - return require("../dist/wasm/optimized.wasm.js"); - } + return require("../dist/wasm/msgpack.wasm.js"); } catch (e) { - if (WASM_DEBUG) { - console.error("WebAssembly is not supported", e); + if (FORCE_WASM) { + throw e; } return {}; } @@ -27,6 +24,9 @@ let { wasmModule } = (() => { export const WASM_AVAILABLE = !!wasmModule; +// A hint to use WASM ver. +export const WASM_STR_THRESHOLD = FORCE_WASM ? 0 : 0x100; + function abort(filename: number, line: number, column: number): void { // FIXME: filename is just a number (pointer?) throw new Error(`abort called at ${filename}:${line}:${column}`); @@ -46,18 +46,55 @@ function setMemoryU8(wasm: any, destPtr: pointer, src: Uint8Array, size: number) const destView = new Uint8Array(wasm.exports.memory.buffer, destPtr, size); destView.set(src); } -function setMemoryU16(wasm: any, destPtr: pointer, src: Uint16Array, size: number) { - const destView = new Uint16Array(wasm.exports.memory.buffer, destPtr, size); - destView.set(src); + +// for debugging purpose +export function utf8CountWasm(str: string, wasm = defaultWasmInstance): number { + const strLength = str.length; + + // prepare inputPtr + const inputLength = strLength * 2; + // u16* + const inputPtr: pointer = wasm.exports.malloc(inputLength); + const inputView = new DataView(wasm.exports.memory.buffer, inputPtr, inputLength); + for (let i = 0; i < strLength; i++) { + inputView.setUint16(i * 2, str.charCodeAt(i)); + } + + try { + return wasm.exports.utf8CountUint16Array(inputPtr, strLength); + } finally { + wasm.exports.free(inputPtr); + } } -export function utf8CountWasm(units: Uint16Array, wasm = defaultWasmInstance): number { - const inputPtr: pointer = wasm.exports.malloc(units.byteLength); +/** + * It encodes string to MessagePack str family (headByte/size + utf8 bytes). + * @returns The whole byte length including headByte/size. + */ +export function utf8EncodeWasm(str: string, output: Uint8Array, wasm = defaultWasmInstance): number { + const strLength = str.length; + + // prepare inputPtr + const inputLength = strLength * 2; + // u16* + const inputPtr: pointer = wasm.exports.malloc(inputLength); + + const inputView = new DataView(wasm.exports.memory.buffer, inputPtr, inputLength); + for (let i = 0; i < strLength; i++) { + // to write u16 in big-endian + inputView.setUint16(i * 2, str.charCodeAt(i)); + } + + // u8* + const maxOutputHeaderSize = 1 + 4; // headByte + u32 + const outputPtr: pointer = wasm.exports.malloc(maxOutputHeaderSize + strLength * 4); try { - setMemoryU16(wasm, inputPtr, units, units.length); - return wasm.exports.utf8CountUint16Array(inputPtr, units.length); + const outputLength = wasm.exports.utf8EncodeUint16Array(outputPtr, inputPtr, strLength); + output.set(new Uint8Array(wasm.exports.memory.buffer, outputPtr, outputLength)); + return outputLength; } finally { wasm.exports.free(inputPtr); + wasm.exports.free(outputPtr); } } From a1dd4f448e3c07d554b5e13dcb72c5c8e89cc1a2 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 09:41:00 +0900 Subject: [PATCH 13/27] tiny optimization for utf8 encode/count --- assembly/utf8CountUint16Array.ts | 29 +++++++--- assembly/utf8EncodeUint16Array.ts | 48 ++++++++-------- package.json | 3 +- src/utils/utf8.ts | 91 ++++++++++++++++--------------- 4 files changed, 96 insertions(+), 75 deletions(-) diff --git a/assembly/utf8CountUint16Array.ts b/assembly/utf8CountUint16Array.ts index dfc0673b..044d0486 100644 --- a/assembly/utf8CountUint16Array.ts +++ b/assembly/utf8CountUint16Array.ts @@ -32,14 +32,29 @@ export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize } else if ((value & 0xfffff800) === 0) { // 2-bytes byteLength += 2; - } else if ((value & 0xffff0000) === 0) { - // 3-byte - byteLength += 3; - } else if ((value & 0xffe00000) === 0) { - // 4-byte - byteLength += 4; } else { - unreachable(); + // handle surrogate pair + if (value>= 0xd800 && value <= 0xdbff) { + // high surrogate + if (pos < end) { + let extra: u32 = loadUint16BE(pos); + if ((extra & 0xfc00) === 0xdc00) { + pos += u16s; + value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; + } + } + if (value>= 0xd800 && value <= 0xdbff) { + continue; // drop lone surrogate + } + } + + if ((value & 0xffff0000) === 0) { + // 3-byte + byteLength += 3; + } else { + // 4-byte + byteLength += 4; + } } } return byteLength; diff --git a/assembly/utf8EncodeUint16Array.ts b/assembly/utf8EncodeUint16Array.ts index 4f3ede92..4d031ca4 100644 --- a/assembly/utf8EncodeUint16Array.ts +++ b/assembly/utf8EncodeUint16Array.ts @@ -40,19 +40,6 @@ export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLe while (inputOffset < inputEnd) { let value: u32 = loadUint16BE(inputOffset); inputOffset += u16s; - if (value>= 0xd800 && value <= 0xdbff) { - // high surrogate - if (inputOffset < inputEnd) { - let extra: u32 = loadUint16BE(inputOffset); - if ((extra & 0xfc00) === 0xdc00) { - inputOffset += u16s; - value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; - } - } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // drop lone surrogate - } - } if ((value & 0xffffff80) === 0) { // 1-byte @@ -61,17 +48,32 @@ export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLe } else if ((value & 0xfffff800) === 0) { // 2-bytes store(outputOffset++, ((value>> 6) & 0x1f) | 0xc0); - } else if ((value & 0xffff0000) === 0) { - // 3-byte - store(outputOffset++, ((value>> 12) & 0x0f) | 0xe0); - store(outputOffset++, ((value>> 6) & 0x3f) | 0x80); - } else if ((value & 0xffe00000) === 0) { - // 4-byte - store(outputOffset++, ((value>> 18) & 0x07) | 0xf0); - store(outputOffset++, ((value>> 12) & 0x3f) | 0x80); - store(outputOffset++, ((value>> 6) & 0x3f) | 0x80); } else { - unreachable(); + // handle surrogate pair + if (value>= 0xd800 && value <= 0xdbff) { + // high surrogate + if (inputOffset < inputEnd) { + let extra: u32 = loadUint16BE(inputOffset); + if ((extra & 0xfc00) === 0xdc00) { + inputOffset += u16s; + value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; + } + } + if (value>= 0xd800 && value <= 0xdbff) { + continue; // drop lone surrogate + } + } + + if ((value & 0xffff0000) === 0) { + // 3-byte + store(outputOffset++, ((value>> 12) & 0x0f) | 0xe0); + store(outputOffset++, ((value>> 6) & 0x3f) | 0x80); + } else { + // 4-byte + store(outputOffset++, ((value>> 18) & 0x07) | 0xf0); + store(outputOffset++, ((value>> 12) & 0x3f) | 0x80); + store(outputOffset++, ((value>> 6) & 0x3f) | 0x80); + } } store(outputOffset++, (value & 0x3f) | 0x80); diff --git a/package.json b/package.json index 30fea350..7e14c37f 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,8 @@ "prepublishOnly": "TEST_DIST=true npm run test", "clean": "rimraf build dist dist.*", "test": "mocha 'test/**/*.test.ts'", - "test:wasm": "MSGPACK_WASM=force mocha 'test/**/*.test.ts'", + "test:wasm": "npm run asbuild:development && MSGPACK_WASM=force mocha 'test/**/*.test.ts'", + "test:purejs": "MSGPACK_WASM=no mocha 'test/**/*.test.ts'", "test:cover": "npx nyc mocha 'test/**/*.test.ts'", "test:browser": "karma start --single-run", "test:browser:firefox": "karma start --single-run --browsers FirefoxHeadless", diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index bd9198f9..784901fc 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -8,20 +8,6 @@ export function utf8Count(str: string): number { while (pos < strLength) { let value = str.charCodeAt(pos++); - if (value>= 0xd800 && value <= 0xdbff) { - // high surrogate - if (pos < strLength) { - const extra = str.charCodeAt(pos); - if ((extra & 0xfc00) === 0xdc00) { - ++pos; - value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; - } - } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // drop lone surrogate - } - } - if ((value & 0xffffff80) === 0) { // 1-byte byteLength++; @@ -29,14 +15,29 @@ export function utf8Count(str: string): number { } else if ((value & 0xfffff800) === 0) { // 2-bytes byteLength += 2; - } else if ((value & 0xffff0000) === 0) { - // 3-byte - byteLength += 3; - } else if ((value & 0xffe00000) === 0) { - // 4-byte - byteLength += 4; } else { - throw new Error(`Invalid UTF-8 byte: ${prettyByte(value)} at ${pos}`); + // handle surrogate pair + if (value>= 0xd800 && value <= 0xdbff) { + // high surrogate + if (pos < strLength) { + const extra = str.charCodeAt(pos); + if ((extra & 0xfc00) === 0xdc00) { + ++pos; + value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; + } + } + if (value>= 0xd800 && value <= 0xdbff) { + continue; // FIXME: drop lone surrogate + } + } + + if ((value & 0xffff0000) === 0) { + // 3-byte + byteLength += 3; + } else { + // 4-byte + byteLength += 4; + } } } return byteLength; @@ -48,19 +49,6 @@ export function utf8Encode(str: string, output: DataView, outputOffset: number): let pos = 0; while (pos < strLength) { let value = str.charCodeAt(pos++); - if (value>= 0xd800 && value <= 0xdbff) { - // high surrogate - if (pos < strLength) { - const extra = str.charCodeAt(pos); - if ((extra & 0xfc00) === 0xdc00) { - ++pos; - value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; - } - } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // drop lone surrogate - } - } if ((value & 0xffffff80) === 0) { // 1-byte @@ -69,17 +57,32 @@ export function utf8Encode(str: string, output: DataView, outputOffset: number): } else if ((value & 0xfffff800) === 0) { // 2-bytes output.setUint8(offset++, ((value>> 6) & 0x1f) | 0xc0); - } else if ((value & 0xffff0000) === 0) { - // 3-byte - output.setUint8(offset++, ((value>> 12) & 0x0f) | 0xe0); - output.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); - } else if ((value & 0xffe00000) === 0) { - // 4-byte - output.setUint8(offset++, ((value>> 18) & 0x07) | 0xf0); - output.setUint8(offset++, ((value>> 12) & 0x3f) | 0x80); - output.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); } else { - throw new Error(`Invalid UTF-8 byte: ${prettyByte(value)} at ${pos}`); + // handle surrogate pair + if (value>= 0xd800 && value <= 0xdbff) { + // high surrogate + if (pos < strLength) { + const extra = str.charCodeAt(pos); + if ((extra & 0xfc00) === 0xdc00) { + ++pos; + value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; + } + } + if (value>= 0xd800 && value <= 0xdbff) { + continue; // FIXME: drop lone surrogate + } + } + + if ((value & 0xffff0000) === 0) { + // 3-byte + output.setUint8(offset++, ((value>> 12) & 0x0f) | 0xe0); + output.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); + } else { + // 4-byte + output.setUint8(offset++, ((value>> 18) & 0x07) | 0xf0); + output.setUint8(offset++, ((value>> 12) & 0x3f) | 0x80); + output.setUint8(offset++, ((value>> 6) & 0x3f) | 0x80); + } } output.setUint8(offset++, (value & 0x3f) | 0x80); From 66f2a7807e6dd37a82dce8e8b1cddf707bf00bfe Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 11:40:47 +0900 Subject: [PATCH 14/27] simulate wasm + esm interface in pack-wasm.ts --- src/wasmFunctions.ts | 79 +++++++++++++++++--------------------------- tools/pack-wasm.ts | 21 +++++++++--- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index 988b67ca..cd0a8ac8 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -4,12 +4,15 @@ declare const WebAssembly: any; // WASM=no - disable WASM functions // WASM=force - force to use WASM functions const WASM: string = process.env.MSGPACK_WASM || process.env.WASM || ""; -export const NO_WASM = WASM === "no"; +export const NO_WASM = WASM === "never"; export const FORCE_WASM = WASM === "force"; -let { wasmModule } = (() => { +type pointer = number; + +// WM stands for WasmModule, but not the WebAssembly.Module instance but the WebAssembly.Instance.prototype.exports +const wm: any = (() => { if (NO_WASM) { - return {}; + return null; } try { @@ -18,52 +21,37 @@ let { wasmModule } = (() => { if (FORCE_WASM) { throw e; } - return {}; + return null; } })(); -export const WASM_AVAILABLE = !!wasmModule; +export const WASM_AVAILABLE = !!wm; // A hint to use WASM ver. export const WASM_STR_THRESHOLD = FORCE_WASM ? 0 : 0x100; -function abort(filename: number, line: number, column: number): void { - // FIXME: filename is just a number (pointer?) - throw new Error(`abort called at ${filename}:${line}:${column}`); -} - -const defaultWasmInstance = - wasmModule && - new WebAssembly.Instance(wasmModule, { - env: { - abort, - }, - }); - -type pointer = number; - -function setMemoryU8(wasm: any, destPtr: pointer, src: Uint8Array, size: number) { - const destView = new Uint8Array(wasm.exports.memory.buffer, destPtr, size); +function setMemoryU8(destPtr: pointer, src: Uint8Array, size: number) { + const destView = new Uint8Array(wm.memory.buffer, destPtr, size); destView.set(src); } // for debugging purpose -export function utf8CountWasm(str: string, wasm = defaultWasmInstance): number { +export function utf8CountWasm(str: string): number { const strLength = str.length; // prepare inputPtr const inputLength = strLength * 2; // u16* - const inputPtr: pointer = wasm.exports.malloc(inputLength); - const inputView = new DataView(wasm.exports.memory.buffer, inputPtr, inputLength); + const inputPtr: pointer = wm.malloc(inputLength); + const inputView = new DataView(wm.memory.buffer, inputPtr, inputLength); for (let i = 0; i < strLength; i++) { inputView.setUint16(i * 2, str.charCodeAt(i)); } try { - return wasm.exports.utf8CountUint16Array(inputPtr, strLength); + return wm.utf8CountUint16Array(inputPtr, strLength); } finally { - wasm.exports.free(inputPtr); + wm.free(inputPtr); } } @@ -71,15 +59,15 @@ export function utf8CountWasm(str: string, wasm = defaultWasmInstance): number { * It encodes string to MessagePack str family (headByte/size + utf8 bytes). * @returns The whole byte length including headByte/size. */ -export function utf8EncodeWasm(str: string, output: Uint8Array, wasm = defaultWasmInstance): number { +export function utf8EncodeWasm(str: string, output: Uint8Array): number { const strLength = str.length; // prepare inputPtr const inputLength = strLength * 2; // u16* - const inputPtr: pointer = wasm.exports.malloc(inputLength); + const inputPtr: pointer = wm.malloc(inputLength); - const inputView = new DataView(wasm.exports.memory.buffer, inputPtr, inputLength); + const inputView = new DataView(wm.memory.buffer, inputPtr, inputLength); for (let i = 0; i < strLength; i++) { // to write u16 in big-endian inputView.setUint16(i * 2, str.charCodeAt(i)); @@ -87,37 +75,32 @@ export function utf8EncodeWasm(str: string, output: Uint8Array, wasm = defaultWa // u8* const maxOutputHeaderSize = 1 + 4; // headByte + u32 - const outputPtr: pointer = wasm.exports.malloc(maxOutputHeaderSize + strLength * 4); + const outputPtr: pointer = wm.malloc(maxOutputHeaderSize + strLength * 4); try { - const outputLength = wasm.exports.utf8EncodeUint16Array(outputPtr, inputPtr, strLength); - output.set(new Uint8Array(wasm.exports.memory.buffer, outputPtr, outputLength)); + const outputLength = wm.utf8EncodeUint16Array(outputPtr, inputPtr, strLength); + output.set(new Uint8Array(wm.memory.buffer, outputPtr, outputLength)); return outputLength; } finally { - wasm.exports.free(inputPtr); - wasm.exports.free(outputPtr); + wm.free(inputPtr); + wm.free(outputPtr); } } // A wrapper function for utf8DecodeToUint16Array() -export function utf8DecodeWasm( - bytes: Uint8Array, - offset: number, - byteLength: number, - wasm = defaultWasmInstance, -): string { - const inputPtr: pointer = wasm.exports.malloc(byteLength); +export function utf8DecodeWasm(bytes: Uint8Array, offset: number, byteLength: number): string { + const inputPtr: pointer = wm.malloc(byteLength); // in worst case, the UTF-16 array uses the same as byteLength * 2 - const outputPtr: pointer = wasm.exports.malloc(byteLength * 2); + const outputPtr: pointer = wm.malloc(byteLength * 2); try { - setMemoryU8(wasm, inputPtr, bytes.subarray(offset, offset + byteLength), byteLength); + setMemoryU8(inputPtr, bytes.subarray(offset, offset + byteLength), byteLength); - const outputArraySize = wasm.exports.utf8DecodeToUint16Array(outputPtr, inputPtr, byteLength); - const codepoints = new Uint16Array(wasm.exports.memory.buffer, outputPtr, outputArraySize); + const outputArraySize = wm.utf8DecodeToUint16Array(outputPtr, inputPtr, byteLength); + const codepoints = new Uint16Array(wm.memory.buffer, outputPtr, outputArraySize); // FIXME: split codepoints if it is too long (the maximum size depends on the JS engine, though). return String.fromCharCode.apply(String, codepoints as any); } finally { - wasm.exports.free(inputPtr); - wasm.exports.free(outputPtr); + wm.free(inputPtr); + wm.free(outputPtr); } } diff --git a/tools/pack-wasm.ts b/tools/pack-wasm.ts index 1bb1d6fb..58b5ab2d 100644 --- a/tools/pack-wasm.ts +++ b/tools/pack-wasm.ts @@ -15,11 +15,24 @@ for (const basename of fs.readdirSync(artifactDir)) { fs.writeFileSync( `${file}.js`, `// generated from ${basename} +"use strict"; + var base64 = require("base64-js"); -module.exports.wasmModule = new WebAssembly.Module( - base64.toByteArray( - ${JSON.stringify(base64.fromByteArray(blob))} -)); + +// synchronous instantiation +var wasmModule = new WebAssembly.Module( + base64.toByteArray(${JSON.stringify(base64.fromByteArray(blob))}) +); +var wasmInstance = new WebAssembly.Instance(wasmModule, { + env: { + abort: function (filename, line, column) { + // FIXME: filename is just a number (pointer?) + throw new Error(\`abort called at \${filename}:\${line}:\${column}\`); + }, + }, +}); + +module.exports = wasmInstance.exports; `, ); } From 0d6b2f4a676df6df9a0b08deaf3bf16f4941bfe5 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 11:41:10 +0900 Subject: [PATCH 15/27] benchmark tweaks --- benchmark/string.ts | 52 ++++++++++++++++++++++++++++++++++----------- package.json | 4 ++-- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/benchmark/string.ts b/benchmark/string.ts index fa1e4aa1..b801394b 100644 --- a/benchmark/string.ts +++ b/benchmark/string.ts @@ -1,22 +1,50 @@ /* eslint-disable no-console */ import { encode, decode } from "../src"; -const data = "Hello, 🌏\n".repeat(10000); +const ascii = "A".repeat(40000); +const emoji = "🌏".repeat(20000); -// warm up -const encoded = encode(data); -decode(encoded); +{ + // warm up ascii + const data = ascii; + const encoded = encode(data); + decode(encoded); + console.log(`encode / decode ascii data.length=${data.length} encoded.byteLength=${encoded.byteLength}`); + + // run -// run + console.time("encode ascii"); + for (let i = 0; i < 1000; i++) { + encode(data); + } + console.timeEnd("encode ascii"); -console.time("encode"); -for (let i = 0; i < 1000; i++) { - encode(data); + console.time("decode ascii"); + for (let i = 0; i < 1000; i++) { + decode(encoded); + } + console.timeEnd("decode ascii"); } -console.timeEnd("encode"); -console.time("decode"); -for (let i = 0; i < 1000; i++) { +{ + // warm up emoji + const data = emoji; + const encoded = encode(data); decode(encoded); + + console.log(`encode / decode emoji data.length=${data.length} encoded.byteLength=${encoded.byteLength}`); + + // run + + console.time("encode emoji"); + for (let i = 0; i < 1000; i++) { + encode(data); + } + console.timeEnd("encode emoji"); + + console.time("decode emoji"); + for (let i = 0; i < 1000; i++) { + decode(encoded); + } + console.timeEnd("decode emoji"); } -console.timeEnd("decode"); diff --git a/package.json b/package.json index 7e14c37f..b13528db 100644 --- a/package.json +++ b/package.json @@ -11,8 +11,8 @@ "prepublishOnly": "TEST_DIST=true npm run test", "clean": "rimraf build dist dist.*", "test": "mocha 'test/**/*.test.ts'", - "test:wasm": "npm run asbuild:development && MSGPACK_WASM=force mocha 'test/**/*.test.ts'", - "test:purejs": "MSGPACK_WASM=no mocha 'test/**/*.test.ts'", + "test:wasm": "npm run asbuild:production && MSGPACK_WASM=force mocha 'test/**/*.test.ts'", + "test:purejs": "MSGPACK_WASM=never mocha 'test/**/*.test.ts'", "test:cover": "npx nyc mocha 'test/**/*.test.ts'", "test:browser": "karma start --single-run", "test:browser:firefox": "karma start --single-run --browsers FirefoxHeadless", From 1525610193c5607dc89a4e26a6c98ae58aadc3ec Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 11:58:22 +0900 Subject: [PATCH 16/27] tweaks for WASM performance --- assembly/index.ts | 5 ----- src/wasmFunctions.ts | 40 +++++++++------------------------------- 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/assembly/index.ts b/assembly/index.ts index 2358ad1c..ef3a43a2 100644 --- a/assembly/index.ts +++ b/assembly/index.ts @@ -1,8 +1,3 @@ -// The entry file of your WebAssembly module. - -// memory is assumed: -// [input][output] export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array"; -export { utf8CountUint16Array } from "./utf8CountUint16Array"; export { utf8EncodeUint16Array } from "./utf8EncodeUint16Array"; export { malloc, free } from "./memory"; diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index cd0a8ac8..aaa30aba 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -27,32 +27,19 @@ const wm: any = (() => { export const WASM_AVAILABLE = !!wm; -// A hint to use WASM ver. -export const WASM_STR_THRESHOLD = FORCE_WASM ? 0 : 0x100; +// A hint of when to use WASM ver. +export const WASM_STR_THRESHOLD = FORCE_WASM ? 0 : 1024; function setMemoryU8(destPtr: pointer, src: Uint8Array, size: number) { const destView = new Uint8Array(wm.memory.buffer, destPtr, size); destView.set(src); } -// for debugging purpose -export function utf8CountWasm(str: string): number { - const strLength = str.length; - - // prepare inputPtr - const inputLength = strLength * 2; - // u16* - const inputPtr: pointer = wm.malloc(inputLength); - const inputView = new DataView(wm.memory.buffer, inputPtr, inputLength); +function setMemoryStr(destPtr: pointer, destByteLength: number, str: string, strLength: number) { + const inputView = new DataView(wm.memory.buffer, destPtr, destByteLength); for (let i = 0; i < strLength; i++) { inputView.setUint16(i * 2, str.charCodeAt(i)); } - - try { - return wm.utf8CountUint16Array(inputPtr, strLength); - } finally { - wm.free(inputPtr); - } } /** @@ -61,27 +48,18 @@ export function utf8CountWasm(str: string): number { */ export function utf8EncodeWasm(str: string, output: Uint8Array): number { const strLength = str.length; + const inputByteLength = strLength * 2; + const inputU16BePtr: pointer = wm.malloc(inputByteLength); + setMemoryStr(inputU16BePtr, inputByteLength, str, strLength); - // prepare inputPtr - const inputLength = strLength * 2; - // u16* - const inputPtr: pointer = wm.malloc(inputLength); - - const inputView = new DataView(wm.memory.buffer, inputPtr, inputLength); - for (let i = 0; i < strLength; i++) { - // to write u16 in big-endian - inputView.setUint16(i * 2, str.charCodeAt(i)); - } - - // u8* const maxOutputHeaderSize = 1 + 4; // headByte + u32 const outputPtr: pointer = wm.malloc(maxOutputHeaderSize + strLength * 4); try { - const outputLength = wm.utf8EncodeUint16Array(outputPtr, inputPtr, strLength); + const outputLength = wm.utf8EncodeUint16Array(outputPtr, inputU16BePtr, strLength); output.set(new Uint8Array(wm.memory.buffer, outputPtr, outputLength)); return outputLength; } finally { - wm.free(inputPtr); + wm.free(inputU16BePtr); wm.free(outputPtr); } } From ede703d5cbf38643edb8193f85ad8527ebd4b61c Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 12:17:32 +0900 Subject: [PATCH 17/27] remove dead code --- assembly/utf8CountUint16Array.ts | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/assembly/utf8CountUint16Array.ts b/assembly/utf8CountUint16Array.ts index 044d0486..afc78621 100644 --- a/assembly/utf8CountUint16Array.ts +++ b/assembly/utf8CountUint16Array.ts @@ -11,20 +11,6 @@ export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize let value: u32 = loadUint16BE(pos); pos += u16s; - if (value>= 0xd800 && value <= 0xdbff) { - // high surrogate - if (pos < end) { - let extra: u32 = loadUint16BE(pos); - if ((extra & 0xfc00) === 0xdc00) { - pos += u16s; - value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; - } - } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // drop lone surrogate - } - } - if ((value & 0xffffff80) === 0) { // 1-byte byteLength++; From f3bca07051a7023010c3bcf19a9a1470e3d40fc4 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 12:17:50 +0900 Subject: [PATCH 18/27] cleanup comments --- benchmark/string.ts | 3 +++ src/wasmFunctions.ts | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmark/string.ts b/benchmark/string.ts index b801394b..e6b18708 100644 --- a/benchmark/string.ts +++ b/benchmark/string.ts @@ -1,5 +1,8 @@ /* eslint-disable no-console */ import { encode, decode } from "../src"; +import { WASM_AVAILABLE } from "../src/wasmFunctions"; + +console.log(`WASM_AVAILABLE=${WASM_AVAILABLE}`); const ascii = "A".repeat(40000); const emoji = "🌏".repeat(20000); diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index aaa30aba..7ce8b103 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -1,7 +1,4 @@ -// TODO: Use TypeScript built-in type -declare const WebAssembly: any; - -// WASM=no - disable WASM functions +// WASM=never - disable WASM functions // WASM=force - force to use WASM functions const WASM: string = process.env.MSGPACK_WASM || process.env.WASM || ""; export const NO_WASM = WASM === "never"; From aaf70e29c73f7674b4bc27c08d6ba9601742cbad Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 14:54:09 +0900 Subject: [PATCH 19/27] tweaks; @inline is redundant in -O3 --- assembly/{memoryBE.ts => be.ts} | 19 ------------------- assembly/utf8CountUint16Array.ts | 2 +- assembly/utf8EncodeUint16Array.ts | 2 +- 3 files changed, 2 insertions(+), 21 deletions(-) rename assembly/{memoryBE.ts => be.ts} (93%) diff --git a/assembly/memoryBE.ts b/assembly/be.ts similarity index 93% rename from assembly/memoryBE.ts rename to assembly/be.ts index ec505036..dcdde080 100644 --- a/assembly/memoryBE.ts +++ b/assembly/be.ts @@ -1,16 +1,13 @@ // load/store values in big-endian -@inline export function loadFload32BE(byteOffset: usize): f32 { return reinterpret(bswap(load(byteOffset))); } -@inline export function loadFloat64BE(byteOffset: usize): f64 { return reinterpret(bswap(load(byteOffset))); } -@inline export function loadInt8BE(byteOffset: usize): i8 { return load(byteOffset); } @@ -19,82 +16,66 @@ export function loadInt16BE(byteOffset: usize): i16 { return bswap(load(byteOffset)); } -@inline export function loadInt32BE(byteOffset: usize): i32 { return bswap(load(byteOffset)); } -@inline export function loadInt64BE(byteOffset: usize): i64 { return bswap(load(byteOffset)); } -@inline export function loadUint8BE(byteOffset: usize): u8 { return load(byteOffset); } -@inline export function loadUint16BE(byteOffset: usize): u16 { return bswap(load(byteOffset)); } -@inline export function loadUint32BE(byteOffset: usize): u32 { return bswap(load(byteOffset)); } -@inline export function loadUint64BE(byteOffset: usize): u64 { return bswap(load(byteOffset)); } -@inline export function storeFloat32BE(byteOffset: usize, value: f32): void { store(byteOffset, bswap(reinterpret(value))); } -@inline export function storeFloat64BE(byteOffset: usize, value: f64): void { store(byteOffset, bswap(reinterpret(value))); } -@inline export function storeInt8BE(byteOffset: usize, value: i8): void { store(byteOffset, value); } -@inline export function storeInt16BE(byteOffset: usize, value: i16): void { store(byteOffset, bswap(value)); } -@inline export function storeInt32BE(byteOffset: usize, value: i32): void { store(byteOffset, bswap(value)); } -@inline export function storeInt64BE(byteOffset: usize, value: i64): void { store(byteOffset, bswap(value)); } -@inline export function storeUint8BE(byteOffset: usize, value: u8): void { store(byteOffset, value); } -@inline export function storeUint16BE(byteOffset: usize, value: u16): void { store(byteOffset, bswap(value)); } -@inline export function storeUint32BE(byteOffset: usize, value: u32): void { store(byteOffset, bswap(value)); } -@inline export function storeUint64BE(byteOffset: usize, value: u64): void { store(byteOffset, bswap(value)); } diff --git a/assembly/utf8CountUint16Array.ts b/assembly/utf8CountUint16Array.ts index afc78621..44259bfc 100644 --- a/assembly/utf8CountUint16Array.ts +++ b/assembly/utf8CountUint16Array.ts @@ -1,4 +1,4 @@ -import { loadUint16BE } from "./memoryBE"; +import { loadUint16BE } from "./be"; // inputPtr: u16* export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize { diff --git a/assembly/utf8EncodeUint16Array.ts b/assembly/utf8EncodeUint16Array.ts index 4d031ca4..0b905835 100644 --- a/assembly/utf8EncodeUint16Array.ts +++ b/assembly/utf8EncodeUint16Array.ts @@ -1,5 +1,5 @@ import { utf8CountUint16Array } from "./utf8CountUint16Array"; -import { storeUint8BE, storeUint16BE, storeUint32BE, loadUint16BE } from "./memoryBE"; +import { storeUint8BE, storeUint16BE, storeUint32BE, loadUint16BE } from "./be"; function storeStringHeader(outputPtr: usize, utf8ByteLength: usize): usize { let ptr = outputPtr; From 8be8c5b3b585f43d403b3ceb5fa4799ba9bd6e25 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 22:28:42 +0900 Subject: [PATCH 20/27] do not include wasm in the default bundle js because it increses the size of bundle +5KiB (+base64-js dep) --- karma.conf.ts | 9 +++++-- package.json | 1 + tools/pack-wasm.ts | 2 +- tsconfig.dist.webpack.json | 3 +-- webpack.config.js | 53 +++++++++++++++++++++++++++++++------- 5 files changed, 54 insertions(+), 14 deletions(-) diff --git a/karma.conf.ts b/karma.conf.ts index 10d5cf87..1ecfd46e 100644 --- a/karma.conf.ts +++ b/karma.conf.ts @@ -1,6 +1,7 @@ import { sauceLabs, sauceLaunchers } from "./sauceLabs"; const webpackConfig = require("./webpack.config.js"); +const webpack = require("webpack"); export default function configure(config: any) { config.set({ @@ -61,12 +62,16 @@ export default function configure(config: any) { }, ], }, + plugins: [ + new webpack.DefinePlugin({ + "process.env.MSGPACK_WASM": JSON.stringify(process.env.MSGPACK_WASM), + }), + ], optimization: { minimize: false, }, performance: { - maxEntrypointSize: 50 * 1024 ** 2, - maxAssetSize: 50 * 1024 ** 2, + hints: false, }, devtool: "inline-source-map", }, diff --git a/package.json b/package.json index b13528db..c7457fd6 100644 --- a/package.json +++ b/package.json @@ -67,6 +67,7 @@ "karma-sauce-launcher": "^2.0.2", "karma-sourcemap-loader": "^0.3.7", "karma-webpack": "^3.0.5", + "lodash": "^4.17.11", "mocha": "^6.1.4", "msgpack-lite": "^0.1.26", "msgpack-test-js": "^1.0.0", diff --git a/tools/pack-wasm.ts b/tools/pack-wasm.ts index 58b5ab2d..36c182c9 100644 --- a/tools/pack-wasm.ts +++ b/tools/pack-wasm.ts @@ -27,7 +27,7 @@ var wasmInstance = new WebAssembly.Instance(wasmModule, { env: { abort: function (filename, line, column) { // FIXME: filename is just a number (pointer?) - throw new Error(\`abort called at \${filename}:\${line}:\${column}\`); + throw new Error("abort called at " + filename + ":" + line + ":" + column); }, }, }); diff --git a/tsconfig.dist.webpack.json b/tsconfig.dist.webpack.json index b6cae97d..8dd32039 100644 --- a/tsconfig.dist.webpack.json +++ b/tsconfig.dist.webpack.json @@ -6,8 +6,7 @@ "outDir": "./build/es5", "declaration": false, "downlevelIteration": true, - "noEmitOnError": true, - "incremental": false, + "noEmitOnError": true }, "include": ["src/**/*.ts"] } diff --git a/webpack.config.js b/webpack.config.js index b3b8f439..df3b698f 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -1,14 +1,16 @@ "use strict"; const path = require("path"); +const webpack = require("webpack"); const { CheckEsVersionPlugin } = require("@bitjourney/check-es-version-webpack-plugin"); -module.exports = { +const _ = require("lodash"); + +const config = { mode: "production", entry: "./src/index.ts", output: { path: path.resolve(__dirname, "dist.es5"), - filename: "msgpack.js", libraryTarget: "commonjs", }, resolve: { @@ -26,7 +28,17 @@ module.exports = { ], }, - // We use webpack just to reduce filesystem accesses + plugins: [ + new CheckEsVersionPlugin({ + esVersion: 5, // for IE11 support + }), + ], + externals: { + "base64-js": { + commonjs: "base64-js", + }, + }, + optimization: { noEmitOnErrors: true, minimize: false, @@ -36,11 +48,34 @@ module.exports = { // https://webpack.js.org/configuration/node/ node: false, - plugins: [ - new CheckEsVersionPlugin({ - esVersion: 5, // for IE11 support - }), - ], - devtool: "source-map", }; + +module.exports = [ + // default bundle does not includes wasm + ((config) => { + config.output.filename = "msgpack.js"; + config.plugins.push( + new webpack.DefinePlugin({ + // The default bundle does not includes WASM + "process.env.MSGPACK_WASM": JSON.stringify("never"), + "process.env.WASM": JSON.stringify(null), + }), + new webpack.IgnorePlugin(/\.\/dist\/wasm\/msgpack\.wasm\.js$/), + ); + return config; + })(_.cloneDeep(config)), + + // +wsm + ((config) => { + config.output.filename = "msgpack+wasm.js"; + config.plugins.push( + new webpack.DefinePlugin({ + // The default bundle does not includes WASM + "process.env.MSGPACK_WASM": JSON.stringify(null), + "process.env.WASM": JSON.stringify(null), + }), + ); + return config; + })(_.cloneDeep(config)), +]; From 9a3c7bf22d5b37b31c5d2e4636644b495d356843 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 23:05:40 +0900 Subject: [PATCH 21/27] fix large string decode issues --- src/utils/utf8.ts | 22 +++++++++++++++++++--- src/wasmFunctions.ts | 8 ++++---- test/msgpack-test-suite.test.ts | 5 +++-- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index 784901fc..46a1ae39 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -1,5 +1,3 @@ -import { prettyByte } from "./prettyByte"; - export function utf8Count(str: string): number { const strLength = str.length; @@ -89,6 +87,23 @@ export function utf8Encode(str: string, output: DataView, outputOffset: number): } } +const CHUNK_SIZE = 0x10_000; + +export function safeStringFromCharCode(units: Array | Uint16Array) { + if (units.length <= CHUNK_SIZE) { + // `String.fromCharCode.apply()` is faster than `String.fromCharCode(...units)` + // in case `units` is a typed array + return String.fromCharCode.apply(String, units as any); + } + + let result = ""; + for (let i = 0; i < units.length; i++) { + const chunk = units.slice(i * CHUNK_SIZE, (i + 1) * CHUNK_SIZE); + result += String.fromCharCode.apply(String, chunk as any); + } + return result; +} + export function utf8Decode(bytes: Uint8Array, outputOffset: number, byteLength: number): string { let offset = outputOffset; const out: Array = []; @@ -123,5 +138,6 @@ export function utf8Decode(bytes: Uint8Array, outputOffset: number, byteLength: out.push(byte1); } } - return String.fromCharCode(...out); + + return safeStringFromCharCode(out); } diff --git a/src/wasmFunctions.ts b/src/wasmFunctions.ts index 7ce8b103..5a3c44a1 100644 --- a/src/wasmFunctions.ts +++ b/src/wasmFunctions.ts @@ -1,3 +1,5 @@ +import { safeStringFromCharCode } from "./utils/utf8"; + // WASM=never - disable WASM functions // WASM=force - force to use WASM functions const WASM: string = process.env.MSGPACK_WASM || process.env.WASM || ""; @@ -70,10 +72,8 @@ export function utf8DecodeWasm(bytes: Uint8Array, offset: number, byteLength: nu setMemoryU8(inputPtr, bytes.subarray(offset, offset + byteLength), byteLength); const outputArraySize = wm.utf8DecodeToUint16Array(outputPtr, inputPtr, byteLength); - const codepoints = new Uint16Array(wm.memory.buffer, outputPtr, outputArraySize); - - // FIXME: split codepoints if it is too long (the maximum size depends on the JS engine, though). - return String.fromCharCode.apply(String, codepoints as any); + const units = new Uint16Array(wm.memory.buffer, outputPtr, outputArraySize); + return safeStringFromCharCode(units); } finally { wm.free(inputPtr); wm.free(outputPtr); diff --git a/test/msgpack-test-suite.test.ts b/test/msgpack-test-suite.test.ts index 3a4af036..fc9dd086 100644 --- a/test/msgpack-test-suite.test.ts +++ b/test/msgpack-test-suite.test.ts @@ -91,8 +91,9 @@ describe("msgpack-test-suite", () => { FLOAT64_POSITIVE_INF: Number.POSITIVE_INFINITY, FLOAT64_NEGATIVE_INF: Number.NEGATIVE_INFINITY, FLOAT64_NAN: Number.NaN, - STR16: "x".repeat(0x100), - STR32: "x".repeat(0x10000), + STR16: "a".repeat(0x100), + STR32: "b".repeat(0x10_000), + STR32LARGE: "c".repeat(0x100_000), // may cause "RangeError: Maximum call stack size exceeded" in simple implelementions BIN16: new Uint8Array(0x100).fill(0xff), BIN32: new Uint8Array(0x10000).fill(0xff), ARRAY16: new Array(0x100).fill(true), From 12fecca0691f039777d0455123e4623b7ab6f592 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月23日 23:49:26 +0900 Subject: [PATCH 22/27] fix karma config because webpack.config.js now returns array of configs --- karma.conf.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/karma.conf.ts b/karma.conf.ts index 1ecfd46e..f9e5718d 100644 --- a/karma.conf.ts +++ b/karma.conf.ts @@ -1,6 +1,5 @@ import { sauceLabs, sauceLaunchers } from "./sauceLabs"; -const webpackConfig = require("./webpack.config.js"); const webpack = require("webpack"); export default function configure(config: any) { @@ -47,7 +46,7 @@ export default function configure(config: any) { util: false, }, resolve: { - ...webpackConfig.resolve, + extensions: [".ts", ".tsx", ".mjs", ".js", ".json", ".wasm"], }, module: { rules: [ From c3c62bda8c7524935f47d529d91c70ac517340f8 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月24日 00:02:21 +0900 Subject: [PATCH 23/27] add test for broken code unit (e.g. \xff) --- test/msgpack-test-suite.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/test/msgpack-test-suite.test.ts b/test/msgpack-test-suite.test.ts index fc9dd086..5613fe58 100644 --- a/test/msgpack-test-suite.test.ts +++ b/test/msgpack-test-suite.test.ts @@ -94,6 +94,7 @@ describe("msgpack-test-suite", () => { STR16: "a".repeat(0x100), STR32: "b".repeat(0x10_000), STR32LARGE: "c".repeat(0x100_000), // may cause "RangeError: Maximum call stack size exceeded" in simple implelementions + STR_BROKEN_FF: "\xff", BIN16: new Uint8Array(0x100).fill(0xff), BIN32: new Uint8Array(0x10000).fill(0xff), ARRAY16: new Array(0x100).fill(true), From 5e631cc7caa8af8662c6b5ed7be5310faaca78f1 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月24日 00:21:03 +0900 Subject: [PATCH 24/27] do not drop lone surrogate --- assembly/utf8CountUint16Array.ts | 3 --- assembly/utf8EncodeUint16Array.ts | 3 --- src/utils/utf8.ts | 6 ------ test/msgpack-test-suite.test.ts | 1 + 4 files changed, 1 insertion(+), 12 deletions(-) diff --git a/assembly/utf8CountUint16Array.ts b/assembly/utf8CountUint16Array.ts index 44259bfc..cd23bb7b 100644 --- a/assembly/utf8CountUint16Array.ts +++ b/assembly/utf8CountUint16Array.ts @@ -29,9 +29,6 @@ export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; } } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // drop lone surrogate - } } if ((value & 0xffff0000) === 0) { diff --git a/assembly/utf8EncodeUint16Array.ts b/assembly/utf8EncodeUint16Array.ts index 0b905835..195991fa 100644 --- a/assembly/utf8EncodeUint16Array.ts +++ b/assembly/utf8EncodeUint16Array.ts @@ -59,9 +59,6 @@ export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLe value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; } } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // drop lone surrogate - } } if ((value & 0xffff0000) === 0) { diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index 46a1ae39..34f9c48e 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -24,9 +24,6 @@ export function utf8Count(str: string): number { value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; } } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // FIXME: drop lone surrogate - } } if ((value & 0xffff0000) === 0) { @@ -66,9 +63,6 @@ export function utf8Encode(str: string, output: DataView, outputOffset: number): value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; } } - if (value>= 0xd800 && value <= 0xdbff) { - continue; // FIXME: drop lone surrogate - } } if ((value & 0xffff0000) === 0) { diff --git a/test/msgpack-test-suite.test.ts b/test/msgpack-test-suite.test.ts index 5613fe58..54ca93f7 100644 --- a/test/msgpack-test-suite.test.ts +++ b/test/msgpack-test-suite.test.ts @@ -95,6 +95,7 @@ describe("msgpack-test-suite", () => { STR32: "b".repeat(0x10_000), STR32LARGE: "c".repeat(0x100_000), // may cause "RangeError: Maximum call stack size exceeded" in simple implelementions STR_BROKEN_FF: "\xff", + STR_LONE_SURROGATE: "\ud800", BIN16: new Uint8Array(0x100).fill(0xff), BIN32: new Uint8Array(0x10000).fill(0xff), ARRAY16: new Array(0x100).fill(true), From 4e058db641fee7148e7fa22dd9855021fba59abb Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月24日 23:41:49 +0900 Subject: [PATCH 25/27] coverage report for both test:purejs and test:wasm --- .nycrc.json | 2 +- package.json | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.nycrc.json b/.nycrc.json index 46a9fcf0..69f58e8d 100644 --- a/.nycrc.json +++ b/.nycrc.json @@ -1,7 +1,7 @@ { "include": ["src/**/*.ts"], "extension": [".ts"], - "reporter": ["text-summary", "html", "lcov"], + "reporter": [], "sourceMap": true, "instrument": true } diff --git a/package.json b/package.json index 646d531f..c72c377a 100644 --- a/package.json +++ b/package.json @@ -11,9 +11,13 @@ "prepublishOnly": "TEST_DIST=true npm run test", "clean": "rimraf build dist dist.*", "test": "mocha 'test/**/*.test.ts'", - "test:wasm": "npm run asbuild:production && MSGPACK_WASM=force mocha 'test/**/*.test.ts'", "test:purejs": "MSGPACK_WASM=never mocha 'test/**/*.test.ts'", - "test:cover": "npx nyc mocha 'test/**/*.test.ts'", + "test:wasm": "npm run asbuild:production && MSGPACK_WASM=force mocha 'test/**/*.test.ts'", + "test:cover": "npm run cover:clean && npm run test:cover:purejs && npm run test:cover:wasm && npm run cover:report", + "test:cover:purejs": "npx nyc --no-clean npm run test:purejs", + "test:cover:wasm": "npx nyc --no-clean npm run test:wasm", + "cover:clean": "rimraf .nyc_output coverage/", + "cover:report": "nyc report --reporter=lcov --reporter=text-summary --reporter=html", "test:browser": "karma start --single-run", "test:browser:firefox": "karma start --single-run --browsers FirefoxHeadless", "test:browser:chrome": "karma start --single-run --browsers ChromeHeadless", From ebe391ac4a68859fde1653d4a553263bb24f4c60 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月25日 10:50:43 +0900 Subject: [PATCH 26/27] set --forceConsistentCasingInFileNames --- tsconfig.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tsconfig.json b/tsconfig.json index cae8370d..d9197a52 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -46,8 +46,9 @@ // "typeRoots": [], /* List of folders to include type definitions from. */ // "types": [], /* Type declaration files to be included in compilation. */ // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ - "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ + "esModuleInterop": true, /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ + "forceConsistentCasingInFileNames": true /* Source Map Options */ // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ From b8025a52499488b93317897c93596f6387aeff21 Mon Sep 17 00:00:00 2001 From: "FUJI Goro (gfx)" Date: 2019年5月25日 11:07:33 +0900 Subject: [PATCH 27/27] test: make mocha.timeout longer (5 sec. to 10 sec.) --- karma.conf.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karma.conf.ts b/karma.conf.ts index f9e5718d..f2539e68 100644 --- a/karma.conf.ts +++ b/karma.conf.ts @@ -79,7 +79,7 @@ export default function configure(config: any) { }, client: { mocha: { - timeout: 5000, + timeout: 10000, }, }, });

AltStyle によって変換されたページ (->オリジナル) /