-
Notifications
You must be signed in to change notification settings - Fork 171
Implement some functions in AssemblyScript/WebAssembly #26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
28 commits
Select commit
Hold shift + click to select a range
f2779b6
initial commit after `asinit .`
gfx 80a1bd4
implement utf8Encode in AssemblyScript (but slow)
gfx be528f3
asbuild -O3; add benchmark; USE_WASM=true
gfx d9bf5af
fix misuse of loaad<T>()
gfx a4903ec
use String.fromCharCode.apply() in WASM ver.
gfx 6c55452
re-structured wasm modules
gfx 6066940
move use of utf8DecodeWasm to utf8.ts
gfx 8a663c4
use memory allocator in wasm functions
gfx 01fd626
refactor dist structure
gfx 7694547
assumes process.env is always available
gfx e36f068
implement utf8CountWasm()
gfx c107406
implement the whole string encoder in wasm
gfx a1dd4f4
tiny optimization for utf8 encode/count
gfx 66f2a78
simulate wasm + esm interface in pack-wasm.ts
gfx 0d6b2f4
benchmark tweaks
gfx 1525610
tweaks for WASM performance
gfx ede703d
remove dead code
gfx f3bca07
cleanup comments
gfx aaf70e2
tweaks; @inline is redundant in -O3
gfx 8be8c5b
do not include wasm in the default bundle js
gfx 78ceac5
Merge remote-tracking branch 'origin/master' into assemblyscript
gfx 9a3c7bf
fix large string decode issues
gfx 12fecca
fix karma config because webpack.config.js now returns array of configs
gfx c3c62bd
add test for broken code unit (e.g. \xff)
gfx 5e631cc
do not drop lone surrogate
gfx 4e058db
coverage report for both test:purejs and test:wasm
gfx ebe391a
set --forceConsistentCasingInFileNames
gfx b8025a5
test: make mocha.timeout longer (5 sec. to 10 sec.)
gfx File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
.nycrc.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
{ | ||
"include": ["src/**/*.ts"], | ||
"extension": [".ts"], | ||
"reporter": ["text-summary", "html", "lcov"], | ||
"reporter": [], | ||
"sourceMap": true, | ||
"instrument": true | ||
} |
81 changes: 81 additions & 0 deletions
assembly/be.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// load/store values in big-endian | ||
|
||
export function loadFload32BE(byteOffset: usize): f32 { | ||
return reinterpret<f32>(bswap<u32>(load<u32>(byteOffset))); | ||
} | ||
|
||
export function loadFloat64BE(byteOffset: usize): f64 { | ||
return reinterpret<f64>(bswap<u64>(load<u64>(byteOffset))); | ||
} | ||
|
||
export function loadInt8BE(byteOffset: usize): i8 { | ||
return load<i8>(byteOffset); | ||
} | ||
|
||
export function loadInt16BE(byteOffset: usize): i16 { | ||
return bswap<i16>(load<i16>(byteOffset)); | ||
} | ||
|
||
export function loadInt32BE(byteOffset: usize): i32 { | ||
return bswap<i32>(load<i32>(byteOffset)); | ||
} | ||
|
||
export function loadInt64BE(byteOffset: usize): i64 { | ||
return bswap<i64>(load<i64>(byteOffset)); | ||
} | ||
|
||
export function loadUint8BE(byteOffset: usize): u8 { | ||
return load<u8>(byteOffset); | ||
} | ||
|
||
export function loadUint16BE(byteOffset: usize): u16 { | ||
return bswap<u16>(load<u16>(byteOffset)); | ||
} | ||
|
||
export function loadUint32BE(byteOffset: usize): u32 { | ||
return bswap<u32>(load<u32>(byteOffset)); | ||
} | ||
|
||
export function loadUint64BE(byteOffset: usize): u64 { | ||
return bswap<u64>(load<u64>(byteOffset)); | ||
} | ||
|
||
export function storeFloat32BE(byteOffset: usize, value: f32): void { | ||
store<u32>(byteOffset, bswap<u32>(reinterpret<u32>(value))); | ||
} | ||
|
||
export function storeFloat64BE(byteOffset: usize, value: f64): void { | ||
store<u64>(byteOffset, bswap<u64>(reinterpret<u64>(value))); | ||
} | ||
|
||
export function storeInt8BE(byteOffset: usize, value: i8): void { | ||
store<i8>(byteOffset, value); | ||
} | ||
|
||
export function storeInt16BE(byteOffset: usize, value: i16): void { | ||
store<i16>(byteOffset, bswap<i16>(value)); | ||
} | ||
|
||
export function storeInt32BE(byteOffset: usize, value: i32): void { | ||
store<i32>(byteOffset, bswap<i32>(value)); | ||
} | ||
|
||
export function storeInt64BE(byteOffset: usize, value: i64): void { | ||
store<i64>(byteOffset, bswap<i64>(value)); | ||
} | ||
|
||
export function storeUint8BE(byteOffset: usize, value: u8): void { | ||
store<u8>(byteOffset, value); | ||
} | ||
|
||
export function storeUint16BE(byteOffset: usize, value: u16): void { | ||
store<u16>(byteOffset, bswap<u16>(value)); | ||
} | ||
|
||
export function storeUint32BE(byteOffset: usize, value: u32): void { | ||
store<u32>(byteOffset, bswap<u32>(value)); | ||
} | ||
|
||
export function storeUint64BE(byteOffset: usize, value: u64): void { | ||
store<u64>(byteOffset, bswap<u64>(value)); | ||
} |
3 changes: 3 additions & 0 deletions
assembly/index.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array"; | ||
export { utf8EncodeUint16Array } from "./utf8EncodeUint16Array"; | ||
export { malloc, free } from "./memory"; |
9 changes: 9 additions & 0 deletions
assembly/memory.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import "allocator/tlsf"; | ||
|
||
export function malloc(size: usize): usize { | ||
return memory.allocate(size); | ||
} | ||
|
||
export function free(ptr: usize): void { | ||
memory.free(ptr); | ||
} |
6 changes: 6 additions & 0 deletions
assembly/tsconfig.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"extends": "../node_modules/assemblyscript/std/assembly.json", | ||
"include": [ | ||
"./**/*.ts" | ||
] | ||
} |
44 changes: 44 additions & 0 deletions
assembly/utf8CountUint16Array.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import { loadUint16BE } from "./be"; | ||
|
||
// inputPtr: u16* | ||
export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize { | ||
const u16s = sizeof<u16>(); | ||
|
||
let byteLength: usize = 0; | ||
let pos: usize = inputPtr; | ||
let end = inputPtr + inputLength * u16s; | ||
while (pos < end) { | ||
let value: u32 = loadUint16BE(pos); | ||
pos += u16s; | ||
|
||
if ((value & 0xffffff80) === 0) { | ||
// 1-byte | ||
byteLength++; | ||
continue; | ||
} else if ((value & 0xfffff800) === 0) { | ||
// 2-bytes | ||
byteLength += 2; | ||
} else { | ||
// handle surrogate pair | ||
if (value >= 0xd800 && value <= 0xdbff) { | ||
// high surrogate | ||
if (pos < end) { | ||
let extra: u32 = loadUint16BE(pos); | ||
if ((extra & 0xfc00) === 0xdc00) { | ||
pos += u16s; | ||
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; | ||
} | ||
} | ||
} | ||
|
||
if ((value & 0xffff0000) === 0) { | ||
// 3-byte | ||
byteLength += 3; | ||
} else { | ||
// 4-byte | ||
byteLength += 4; | ||
} | ||
} | ||
} | ||
return byteLength; | ||
} |
46 changes: 46 additions & 0 deletions
assembly/utf8DecodeToUint16Array.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
export function utf8DecodeToUint16Array(outputPtr: usize, inputPtr: usize, byteLength: usize): usize { | ||
let inputOffset = inputPtr; | ||
let outputOffset = outputPtr; | ||
let inputOffsetEnd = inputOffset + byteLength; | ||
const u16s = sizeof<u16>(); | ||
|
||
while (inputOffset < inputOffsetEnd) { | ||
let byte1: u16 = load<u8>(inputOffset++); | ||
if ((byte1 & 0x80) === 0) { | ||
// 1 byte | ||
store<u16>(outputOffset, byte1); | ||
outputOffset += u16s; | ||
} else if ((byte1 & 0xe0) === 0xc0) { | ||
// 2 bytes | ||
let byte2: u16 = load<u8>(inputOffset++) & 0x3f; | ||
// FIXME: consider endians | ||
store<u16>(outputOffset, ((byte1 & 0x1f) << 6) | byte2); | ||
outputOffset += u16s; | ||
} else if ((byte1 & 0xf0) === 0xe0) { | ||
// 3 bytes | ||
let byte2: u16 = load<u8>(inputOffset++) & 0x3f; | ||
let byte3: u16 = load<u8>(inputOffset++) & 0x3f; | ||
store<u16>(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3); | ||
outputOffset += u16s; | ||
} else if ((byte1 & 0xf8) === 0xf0) { | ||
// 4 bytes | ||
let byte2 = load<u8>(inputOffset++) & 0x3f; | ||
let byte3 = load<u8>(inputOffset++) & 0x3f; | ||
let byte4 = load<u8>(inputOffset++) & 0x3f; | ||
let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4; | ||
if (codepoint > 0xffff) { | ||
codepoint -= 0x10000; | ||
store<u16>(outputOffset, ((codepoint >>> 10) & 0x3ff) | 0xd800); | ||
outputOffset += u16s; | ||
codepoint = 0xdc00 | (codepoint & 0x3ff); | ||
} | ||
store<u16>(outputOffset, codepoint); | ||
outputOffset += u16s; | ||
} else { | ||
// invalid UTF-8 | ||
store<u16>(outputOffset++, byte1); | ||
outputOffset += u16s; | ||
} | ||
} | ||
return (outputOffset - outputPtr) / u16s; | ||
} |
80 changes: 80 additions & 0 deletions
assembly/utf8EncodeUint16Array.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import { utf8CountUint16Array } from "./utf8CountUint16Array"; | ||
import { storeUint8BE, storeUint16BE, storeUint32BE, loadUint16BE } from "./be"; | ||
|
||
function storeStringHeader(outputPtr: usize, utf8ByteLength: usize): usize { | ||
let ptr = outputPtr; | ||
if (utf8ByteLength < 32) { | ||
// fixstr | ||
storeUint8BE(ptr++, 0xa0 + (utf8ByteLength as u8)); | ||
} else if (utf8ByteLength < 0x100) { | ||
// str 8 | ||
storeUint8BE(ptr++, 0xd9); | ||
storeUint8BE(ptr++, utf8ByteLength as u8); | ||
} else if (utf8ByteLength < 0x10000) { | ||
// str 16 | ||
storeUint8BE(ptr++, 0xda); | ||
storeUint16BE(ptr, utf8ByteLength as u16); | ||
ptr += sizeof<u16>(); | ||
} else if ((utf8ByteLength as u64) < 0x100000000) { | ||
// str 32 | ||
storeUint8BE(ptr++, 0xdb); | ||
storeUint32BE(ptr, utf8ByteLength as u32); | ||
ptr += sizeof<u32>(); | ||
} else { | ||
throw new Error(`Too long string: ${utf8ByteLength} bytes in UTF-8`); | ||
} | ||
return ptr; | ||
} | ||
|
||
// outputPtr: u8* | ||
// inputPtr: u16* | ||
// It adds MessagePack str head bytes to the output | ||
export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLength: usize): usize { | ||
let utf8ByteLength = utf8CountUint16Array(inputPtr, inputLength); | ||
let strHeaderOffset = storeStringHeader(outputPtr, utf8ByteLength); | ||
|
||
const u16s = sizeof<u16>(); | ||
let inputOffset = inputPtr; | ||
let inputEnd = inputPtr + inputLength * u16s; | ||
let outputOffset = strHeaderOffset; | ||
while (inputOffset < inputEnd) { | ||
let value: u32 = loadUint16BE(inputOffset); | ||
inputOffset += u16s; | ||
|
||
if ((value & 0xffffff80) === 0) { | ||
// 1-byte | ||
store<u8>(outputOffset++, value); | ||
continue; | ||
} else if ((value & 0xfffff800) === 0) { | ||
// 2-bytes | ||
store<u8>(outputOffset++, ((value >> 6) & 0x1f) | 0xc0); | ||
} else { | ||
// handle surrogate pair | ||
if (value >= 0xd800 && value <= 0xdbff) { | ||
// high surrogate | ||
if (inputOffset < inputEnd) { | ||
let extra: u32 = loadUint16BE(inputOffset); | ||
if ((extra & 0xfc00) === 0xdc00) { | ||
inputOffset += u16s; | ||
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; | ||
} | ||
} | ||
} | ||
|
||
if ((value & 0xffff0000) === 0) { | ||
// 3-byte | ||
store<u8>(outputOffset++, ((value >> 12) & 0x0f) | 0xe0); | ||
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80); | ||
} else { | ||
// 4-byte | ||
store<u8>(outputOffset++, ((value >> 18) & 0x07) | 0xf0); | ||
store<u8>(outputOffset++, ((value >> 12) & 0x3f) | 0x80); | ||
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80); | ||
} | ||
} | ||
|
||
store<u8>(outputOffset++, (value & 0x3f) | 0x80); | ||
} | ||
|
||
return outputOffset - outputPtr; | ||
} |
53 changes: 53 additions & 0 deletions
benchmark/string.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* eslint-disable no-console */ | ||
import { encode, decode } from "../src"; | ||
import { WASM_AVAILABLE } from "../src/wasmFunctions"; | ||
|
||
console.log(`WASM_AVAILABLE=${WASM_AVAILABLE}`); | ||
|
||
const ascii = "A".repeat(40000); | ||
const emoji = "🌏".repeat(20000); | ||
|
||
{ | ||
// warm up ascii | ||
const data = ascii; | ||
const encoded = encode(data); | ||
decode(encoded); | ||
console.log(`encode / decode ascii data.length=${data.length} encoded.byteLength=${encoded.byteLength}`); | ||
|
||
// run | ||
|
||
console.time("encode ascii"); | ||
for (let i = 0; i < 1000; i++) { | ||
encode(data); | ||
} | ||
console.timeEnd("encode ascii"); | ||
|
||
console.time("decode ascii"); | ||
for (let i = 0; i < 1000; i++) { | ||
decode(encoded); | ||
} | ||
console.timeEnd("decode ascii"); | ||
} | ||
|
||
{ | ||
// warm up emoji | ||
const data = emoji; | ||
const encoded = encode(data); | ||
decode(encoded); | ||
|
||
console.log(`encode / decode emoji data.length=${data.length} encoded.byteLength=${encoded.byteLength}`); | ||
|
||
// run | ||
|
||
console.time("encode emoji"); | ||
for (let i = 0; i < 1000; i++) { | ||
encode(data); | ||
} | ||
console.timeEnd("encode emoji"); | ||
|
||
console.time("decode emoji"); | ||
for (let i = 0; i < 1000; i++) { | ||
decode(encoded); | ||
} | ||
console.timeEnd("decode emoji"); | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.