Skip to content

Commit 29f5afa

Browse files
committed
optimize utf8count wasm
1 parent 5ff999d commit 29f5afa

File tree

3 files changed

+79
-29
lines changed

3 files changed

+79
-29
lines changed

benchmark/count-utf8.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/* eslint-disable no-console */
2+
import { utf8CountJs, WASM_AVAILABLE } from "../src/utils/utf8.ts";
3+
import { getWasmError, utf8CountWasm } from "../src/utils/utf8-wasm.ts";
4+
5+
// @ts-ignore
6+
import Benchmark from "benchmark";
7+
8+
// description
9+
console.log("utf8CountJs - pure JS implementation");
10+
console.log("utf8CountWasm - WebAssembly implementation");
11+
12+
// Show wasm status
13+
console.log("=".repeat(60));
14+
console.log("WebAssembly Status:");
15+
console.log(` WASM_AVAILABLE: ${WASM_AVAILABLE}`);
16+
if (WASM_AVAILABLE) {
17+
console.log(" js-string-builtins: enabled");
18+
} else {
19+
const error = getWasmError();
20+
console.log(` Error: ${error?.message || "unknown"}`);
21+
if (error?.message?.includes("js-string") || error?.message?.includes("builtin")) {
22+
console.log("\n js-string-builtins is enabled by default in Node.js 24+ (V8 13.6+).");
23+
console.log(" For older versions, run with:");
24+
console.log(" node --experimental-wasm-imported-strings node_modules/.bin/ts-node benchmark/count-utf8.ts");
25+
}
26+
}
27+
console.log("=".repeat(60));
28+
29+
for (const baseStr of ["A", "あ", "🌏"]) {
30+
const dataSet = [10, 30, 50, 100, 200, 500, 1000].map((n) => {
31+
return baseStr.repeat(n);
32+
});
33+
34+
for (const str of dataSet) {
35+
const byteLength = utf8CountJs(str);
36+
37+
console.log(`\n## string "${baseStr}" (strLength=${str.length}, byteLength=${byteLength})\n`);
38+
39+
const suite = new Benchmark.Suite();
40+
41+
suite.add("utf8CountJs", () => {
42+
utf8CountJs(str);
43+
});
44+
45+
if (WASM_AVAILABLE) {
46+
suite.add("utf8CountWasm", () => {
47+
utf8CountWasm(str);
48+
});
49+
}
50+
51+
suite.on("cycle", (event: any) => {
52+
console.log(String(event.target));
53+
});
54+
55+
suite.run();
56+
}
57+
}

src/utils/utf8-wasm-binary.ts

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,22 @@
22
// Source: wasm/utf8.wat
33

44
export const wasmBinary = `
5-
AGFzbQEAAAABNQhedwFgAW8Bf2ADb2QAfwF/YANkAH9/AWRvYAJvfwF/YAJ/ZAABf2ABfwFkAGADZA
6-
B/fwFvAl8DDndhc206anMtc3RyaW5nBmxlbmd0aAABDndhc206anMtc3RyaW5nEWludG9DaGFyQ29k
7-
ZUFycmF5AAIOd2FzbTpqcy1zdHJpbmcRZnJvbUNoYXJDb2RlQXJyYXkAAwMGBQEEBQYHBQMBAAEHVA
8-
YGbWVtb3J5AgAJdXRmOENvdW50AAMKdXRmOEVuY29kZQAEEXV0ZjhEZWNvZGVUb0FycmF5AAUKYWxs
9-
b2NBcnJheQAGDWFycmF5VG9TdHJpbmcABwqQBgWEAQIEfwFkACAAEAAiBEUEQEEADwsgACAE+wcAIg
10-
VBABABGgNAIAEgBE9FBEAgBSAB+w0AIgNBgAFJBH8gAkEBagUgA0GAEEkEfyACQQJqBSADQf+3A00g
11-
A0GAsANPcQR/IAFBAWohASACQQRqBSACQQNqCwsLIQIgAUEBaiEBDAELCyACC7MCAgR/AWQAIAEhAi
12-
AAIAAQACIF+wcAIgZBABABGgNAIAQgBU9FBEAgBiAE+w0AIgNBgAFJBH8gAiADOgAAIAJBAWoFIANB
13-
gBBJBH8gAiADQQZ2QcABcjoAACACQQFqIANBP3FBgAFyOgAAIAJBAmoFIANB/7cDTSADQYCwA09xBH
14-
8gAiADQQp0IAYgBEEBaiIE+w0AakGAuP8aayIDQRJ2QfABcjoAACACQQFqIANBDHZBP3FBgAFyOgAA
15-
IAJBAmogA0EGdkE/cUGAAXI6AAAgAkEDaiADQT9xQYABcjoAACACQQRqBSACIANBDHZB4AFyOgAAIA
16-
JBAWogA0EGdkE/cUGAAXI6AAAgAkECaiADQT9xQYABcjoAACACQQNqCwsLIQIgBEEBaiEEDAELCyAC
17-
IAFrC78CAQN/A0AgACACSwRAIAItAAAiBEGAAXFFBEAgASADIAT7DgAgA0EBaiEDIAJBAWohAgwCCy
18-
AEQeABcUHAAUYEQCABIAMgAkEBai0AAEE/cSAEQR9xQQZ0cvsOACADQQFqIQMgAkECaiECDAILIARB
19-
8AFxQeABRgRAIAEgAyACQQJqLQAAQT9xIARBD3FBDHQgAkEBai0AAEE/cUEGdHJy+w4AIANBAWohAy
20-
ACQQNqIQIMAgsgBEH4AXFB8AFGBEAgASADIAJBA2otAABBP3EgBEEHcUESdCACQQFqLQAAQT9xQQx0
21-
ciACQQJqLQAAQT9xQQZ0cnJBgIAEayIEQQp2QYCwA3L7DgAgASADQQFqIgMgBEH/B3FBgLgDcvsOAC
22-
ADQQFqIQMgAkEEaiECDAIFIAJBAWohAgwCCwALCyADCwcAIAD7BwALCgAgACABIAIQAgs=
5+
AGFzbQEAAAABNQhedwFgAW8Bf2ACb38Bf2ADb2QAfwF/YANkAH9/AWRvYAJ/ZAABf2ABfwFkAGADZA
6+
B/fwFvAnsEDndhc206anMtc3RyaW5nBmxlbmd0aAABDndhc206anMtc3RyaW5nCmNoYXJDb2RlQXQA
7+
Ag53YXNtOmpzLXN0cmluZxFpbnRvQ2hhckNvZGVBcnJheQADDndhc206anMtc3RyaW5nEWZyb21DaG
8+
FyQ29kZUFycmF5AAQDBgUBAgUGBwUDAQABB1QGBm1lbW9yeQIACXV0ZjhDb3VudAAECnV0ZjhFbmNv
9+
ZGUABRF1dGY4RGVjb2RlVG9BcnJheQAGCmFsbG9jQXJyYXkABw1hcnJheVRvU3RyaW5nAAgK9gUFaw
10+
EEfyAAEAAhBANAIAEgBE9FBEAgACABEAEiA0GAAUkEfyACQQFqBSADQYAQSQR/IAJBAmoFIANB/7cD
11+
TSADQYCwA09xBH8gAUEBaiEBIAJBBGoFIAJBA2oLCwshAiABQQFqIQEMAQsLIAILswICBH8BZAAgAS
12+
ECIAAgABAAIgX7BwAiBkEAEAIaA0AgBCAFT0UEQCAGIAT7DQAiA0GAAUkEfyACIAM6AAAgAkEBagUg
13+
A0GAEEkEfyACIANBBnZBwAFyOgAAIAJBAWogA0E/cUGAAXI6AAAgAkECagUgA0H/twNNIANBgLADT3
14+
EEfyACIANBCnQgBiAEQQFqIgT7DQBqQYC4/xprIgNBEnZB8AFyOgAAIAJBAWogA0EMdkE/cUGAAXI6
15+
AAAgAkECaiADQQZ2QT9xQYABcjoAACACQQNqIANBP3FBgAFyOgAAIAJBBGoFIAIgA0EMdkHgAXI6AA
16+
AgAkEBaiADQQZ2QT9xQYABcjoAACACQQJqIANBP3FBgAFyOgAAIAJBA2oLCwshAiAEQQFqIQQMAQsL
17+
IAIgAWsLvwIBA38DQCAAIAJLBEAgAi0AACIEQYABcUUEQCABIAMgBPsOACADQQFqIQMgAkEBaiECDA
18+
ILIARB4AFxQcABRgRAIAEgAyACQQFqLQAAQT9xIARBH3FBBnRy+w4AIANBAWohAyACQQJqIQIMAgsg
19+
BEHwAXFB4AFGBEAgASADIAJBAmotAABBP3EgBEEPcUEMdCACQQFqLQAAQT9xQQZ0cnL7DgAgA0EBai
20+
EDIAJBA2ohAgwCCyAEQfgBcUHwAUYEQCABIAMgAkEDai0AAEE/cSAEQQdxQRJ0IAJBAWotAABBP3FB
21+
DHRyIAJBAmotAABBP3FBBnRyckGAgARrIgRBCnZBgLADcvsOACABIANBAWoiAyAEQf8HcUGAuANy+w
22+
4AIANBAWohAyACQQRqIQIMAgUgAkEBaiECDAILAAsLIAMLBwAgAPsHAAsKACAAIAEgAhADCw==
2323
`;

wasm/utf8.wat

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
;; Import js-string builtins
1212
(import "wasm:js-string" "length"
1313
(func $str_length (param externref) (result i32)))
14+
(import "wasm:js-string" "charCodeAt"
15+
(func $str_charCodeAt (param externref i32) (result i32)))
1416
(import "wasm:js-string" "intoCharCodeArray"
1517
(func $str_into_array (param externref (ref $i16_array) i32) (result i32)))
1618
(import "wasm:js-string" "fromCharCodeArray"
@@ -20,30 +22,21 @@
2022
(memory (export "memory") 1)
2123

2224
;; Count UTF-8 byte length of a JS string
23-
;; Uses GC array to get all char codes at once
25+
;; Uses charCodeAt directly to avoid array allocation overhead
2426
(func (export "utf8Count") (param $str externref) (result i32)
2527
(local $len i32)
26-
(local $arr (ref $i16_array))
2728
(local $i i32)
2829
(local $byteLen i32)
2930
(local $code i32)
3031

3132
(local.set $len (call $str_length (local.get $str)))
3233

33-
;; Handle empty string
34-
(if (i32.eqz (local.get $len))
35-
(then (return (i32.const 0))))
36-
37-
;; Allocate array and copy string chars
38-
(local.set $arr (array.new $i16_array (i32.const 0) (local.get $len)))
39-
(drop (call $str_into_array (local.get $str) (local.get $arr) (i32.const 0)))
40-
4134
;; Count UTF-8 bytes
4235
(block $break
4336
(loop $continue
4437
(br_if $break (i32.ge_u (local.get $i) (local.get $len)))
4538

46-
(local.set $code (array.get_u $i16_array (local.get $arr) (local.get $i)))
39+
(local.set $code (call $str_charCodeAt (local.get $str) (local.get $i)))
4740

4841
;; 1-byte: 0x00-0x7F
4942
(if (i32.lt_u (local.get $code) (i32.const 0x80))

0 commit comments

Comments
 (0)