Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 71ae1cf

Browse files
authored
Add caching to custom_func javascript to boost perf (see benchmark results) (#67)
* Add caching to custom_func `javascript` to boost perf (see benchmark results) * export caches so caller can adjust their caps if needed * separate javascript into javascript and javascript_with_context to avoid unnecessary JSONify of contextual node which is pretty expensive; also address PR comment * clear global vars after javascript program is executed
1 parent 9c017ae commit 71ae1cf

File tree

10 files changed

+187
-38
lines changed

10 files changed

+187
-38
lines changed

‎cli/cmd/transformCmd.go‎

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"path/filepath"
88
"strings"
99

10+
"github.com/jf-tech/go-corelib/ios"
1011
"github.com/jf-tech/go-corelib/jsons"
1112
"github.com/jf-tech/go-corelib/strs"
1213
"github.com/spf13/cobra"
@@ -36,17 +37,8 @@ func init() {
3637
&input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)")
3738
}
3839

39-
// TODO: move to go-corelib.
40-
func fileExists(file string) bool {
41-
fi, err := os.Stat(file)
42-
if os.IsNotExist(err) {
43-
return false
44-
}
45-
return !fi.IsDir()
46-
}
47-
4840
func openFile(label string, filepath string) (io.ReadCloser, error) {
49-
if !fileExists(schema) {
41+
if !ios.FileExists(schema) {
5042
return nil, fmt.Errorf("%s file '%s' does not exist", label, filepath)
5143
}
5244
return os.Open(filepath)

‎customfuncs/.snapshots/TestDumpBuiltinCustomFuncNames‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"ifElse",
1515
"isEmpty",
1616
"javascript",
17+
"javascript_with_context",
1718
"lower",
1819
"splitIntoJsonArray",
1920
"substring",

‎customfuncs/customFuncs.go‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ var builtinPublishedCustomFuncs = map[string]CustomFuncType{
4242
"dateTimeLayoutToRFC3339": dateTimeLayoutToRFC3339,
4343
"dateTimeToEpoch": dateTimeToEpoch,
4444
"dateTimeToRFC3339": dateTimeToRFC3339,
45-
"eval": eval,
4645
"floor": floor,
4746
"ifElse": ifElse,
4847
"isEmpty": isEmpty,
4948
"javascript": javascript,
49+
"javascript_with_context": javascriptWithContext,
5050
"lower": lower,
5151
"splitIntoJsonArray": splitIntoJsonArray,
5252
"substring": substring,
@@ -59,6 +59,7 @@ var builtinHiddenBackCompatCustomFuncs = map[string]CustomFuncType{
5959
// keep these custom funcs lexically sorted
6060
"dateTimeToRfc3339": dateTimeToRFC3339, // deprecated; use dateTimeToRFC3339.
6161
"dateTimeWithLayoutToRfc3339": dateTimeLayoutToRFC3339, // deprecated; use dateTimeLayoutToRFC3339.
62+
"eval": eval, // deprecated; use javascript.
6263
"external": external, // deprecated; use "external" decl.
6364
}
6465

‎customfuncs/javascript.go‎

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ import (
55
"fmt"
66
"strconv"
77
"strings"
8+
"unsafe"
89

910
node "github.com/antchfx/xmlquery"
1011
"github.com/dop251/goja"
12+
"github.com/jf-tech/go-corelib/caches"
1113
"github.com/jf-tech/go-corelib/strs"
1214

1315
"github.com/jf-tech/omniparser/nodes"
@@ -62,20 +64,92 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac
6264
}
6365
}
6466

65-
func javascript(_ *transformctx.Ctx, n *node.Node, js string, args ...string) (string, error) {
67+
// For debugging/testing purpose so we can easily disable all the caches. But not exported. We always
68+
// want caching in production.
69+
var disableCache = false
70+
71+
// JSProgramCache caches *goja.Program. A *goja.Program is compiled javascript and it can be used
72+
// across multiple goroutines and across different *goja.Runtime.
73+
var JSProgramCache = caches.NewLoadingCache() // per schema so won't have too many, no need to put a hard cap.
74+
// JSRuntimeCache caches *goja.Runtime. A *goja.Runtime is a javascript VM. It can *not* be shared
75+
// across multiple goroutines.
76+
var JSRuntimeCache = caches.NewLoadingCache(100) // per transform, plus expensive, a smaller cap.
77+
// NodeToJSONCache caches *node.Node tree to translated JSON string.
78+
var NodeToJSONCache = caches.NewLoadingCache(100) // per transform, plus expensive, a smaller cap.
79+
80+
func getProgram(js string) (*goja.Program, error) {
81+
if disableCache {
82+
return goja.Compile("", js, false)
83+
}
84+
p, err := JSProgramCache.Get(js, func(interface{}) (interface{}, error) {
85+
return goja.Compile("", js, false)
86+
})
87+
if err != nil {
88+
return nil, err
89+
}
90+
return p.(*goja.Program), nil
91+
}
92+
93+
func ptrAddrStr(p unsafe.Pointer) string {
94+
return strconv.FormatUint(uint64(uintptr(p)), 16)
95+
}
96+
97+
func getRuntime(ctx *transformctx.Ctx) *goja.Runtime {
98+
if disableCache {
99+
return goja.New()
100+
}
101+
// a VM can be reused as long as not across thread. We don't have access to
102+
// thread/goroutine id (nor do we want to use some hack to get it, see
103+
// https://golang.org/doc/faq#no_goroutine_id). Instead, we use ctx as an
104+
// indicator - omniparser runs on a single thread per transform. And ctx is
105+
// is per transform.
106+
addr := ptrAddrStr(unsafe.Pointer(ctx))
107+
vm, _ := JSRuntimeCache.Get(addr, func(interface{}) (interface{}, error) {
108+
return goja.New(), nil
109+
})
110+
return vm.(*goja.Runtime)
111+
}
112+
113+
func getNodeJSON(n *node.Node) string {
114+
if disableCache {
115+
return nodes.JSONify2(n)
116+
}
117+
addr := ptrAddrStr(unsafe.Pointer(n))
118+
j, _ := NodeToJSONCache.Get(addr, func(interface{}) (interface{}, error) {
119+
return nodes.JSONify2(n), nil
120+
})
121+
return j.(string)
122+
}
123+
124+
// javascriptWithContext is a custom_func that runs a javascript with optional arguments and
125+
// with current node JSON, if the context node is provided.
126+
func javascriptWithContext(ctx *transformctx.Ctx, n *node.Node, js string, args ...string) (string, error) {
66127
if len(args)%2 != 0 {
67128
return "", errors.New("invalid number of args to 'javascript'")
68129
}
69-
vm := goja.New()
70-
vm.Set(argNameNode, nodes.JSONify2(n))
130+
program, err := getProgram(js)
131+
if err != nil {
132+
return "", fmt.Errorf("invalid javascript: %s", err.Error())
133+
}
134+
runtime := getRuntime(ctx)
135+
var varnames []string
136+
defer func() {
137+
for i := range varnames {
138+
runtime.Set(varnames[i], nil)
139+
}
140+
}()
71141
for i := 0; i < len(args)/2; i++ {
72-
n, v, err := parseArgTypeAndValue(args[i*2], args[i*2+1])
142+
varname, val, err := parseArgTypeAndValue(args[i*2], args[i*2+1])
73143
if err != nil {
74144
return "", err
75145
}
76-
vm.Set(n, v)
146+
runtime.Set(varname, val)
147+
varnames = append(varnames, varname)
148+
}
149+
if n != nil {
150+
runtime.Set(argNameNode, getNodeJSON(n))
77151
}
78-
v, err := vm.RunString(js)
152+
v, err := runtime.RunProgram(program)
79153
if err != nil {
80154
return "", err
81155
}
@@ -86,3 +160,9 @@ func javascript(_ *transformctx.Ctx, n *node.Node, js string, args ...string) (s
86160
return v.String(), nil
87161
}
88162
}
163+
164+
// javascript is a custom_func that runs a javascript with optional arguments and without context
165+
// node JSON provided.
166+
func javascript(ctx *transformctx.Ctx, js string, args ...string) (string, error) {
167+
return javascriptWithContext(ctx, nil, js, args...)
168+
}

‎customfuncs/javascript_test.go‎

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"strings"
55
"testing"
66

7-
node "github.com/antchfx/xmlquery"
87
"github.com/stretchr/testify/assert"
98

109
"github.com/jf-tech/omniparser/nodes"
@@ -128,7 +127,14 @@ func TestJavascript(t *testing.T) {
128127
name: "invalid javascript",
129128
js: "var;",
130129
args: nil,
131-
err: "SyntaxError: (anonymous): Line 1:4 Unexpected token ; (and 1 more errors)",
130+
err: "invalid javascript: SyntaxError: (anonymous): Line 1:4 Unexpected token ; (and 1 more errors)",
131+
expected: "",
132+
},
133+
{
134+
name: "javascript throws",
135+
js: "throw 'failure';",
136+
args: nil,
137+
err: "failure at <eval>:1:7(1)",
132138
expected: "",
133139
},
134140
{
@@ -160,8 +166,14 @@ func TestJavascript(t *testing.T) {
160166
expected: "",
161167
},
162168
} {
163-
t.Run(test.name, func(t *testing.T) {
164-
ret, err := javascript(nil, testNode, test.js, test.args...)
169+
testFn := func(t *testing.T) {
170+
var ret string
171+
var err error
172+
if strings.Contains(test.js, "_node") {
173+
ret, err = javascriptWithContext(nil, testNode, test.js, test.args...)
174+
} else {
175+
ret, err = javascript(nil, test.js, test.args...)
176+
}
165177
if test.err != "" {
166178
assert.Error(t, err)
167179
assert.Equal(t, test.err, err.Error())
@@ -170,19 +182,34 @@ func TestJavascript(t *testing.T) {
170182
assert.NoError(t, err)
171183
assert.Equal(t, test.expected, ret)
172184
}
185+
}
186+
t.Run(test.name+" (without cache)", func(t *testing.T) {
187+
disableCache = true
188+
testFn(t)
189+
})
190+
t.Run(test.name+" (with cache)", func(t *testing.T) {
191+
disableCache = false
192+
testFn(t)
173193
})
174194
}
175195
}
176196

177-
// The following benchmarks compare 'ifElse' vs 'eval' vs 'javascript' and show that
178-
// while the flexibility and power increase from left to right, the performance
179-
// decreases, dramatically:
180-
//
181-
// BenchmarkIfElse-4 4385941 270 ns/op 133 B/op 2 allocs/op
182-
// BenchmarkEval-4 642870 1843 ns/op 576 B/op 11 allocs/op
183-
// BenchmarkJavascript-4 5566 214070 ns/op 136756 B/op 1704 allocs/op
184-
//
185-
// So use 'javascript' only when expression/logic complexity warrants the performance tradeoff.
197+
func TestJavascriptClearVarsAfterRunProgram(t *testing.T) {
198+
r, err := javascriptWithContext(nil, nil, `v1 + v2`, "v1:int", "1", "v2:int", "2")
199+
assert.NoError(t, err)
200+
assert.Equal(t, "3", r)
201+
// Note v1 should be cleared before second run.
202+
r, err = javascriptWithContext(nil, nil, `v3 + v4 + v1`, "v3:int", "10", "v4:int", "20")
203+
assert.NoError(t, err)
204+
assert.Equal(t, "30", r)
205+
}
206+
207+
// go test -bench=. -benchmem -benchtime=30s
208+
// BenchmarkIfElse-4 234978459 152 ns/op 69 B/op 1 allocs/op
209+
// BenchmarkEval-4 19715643 1871 ns/op 576 B/op 11 allocs/op
210+
// BenchmarkJavascriptWithNoCache-4 165547 218455 ns/op 136733 B/op 1704 allocs/op
211+
// BenchmarkJavascriptWithCache-4 17685051 2047 ns/op 272 B/op 15 allocs/op
212+
186213
var (
187214
benchTitles = []string{"", "Dr", "Sir"}
188215
benchNames = []string{"", "Jane", "John"}
@@ -237,9 +264,10 @@ func BenchmarkEval(b *testing.B) {
237264
}
238265
}
239266

240-
func BenchmarkJavascript(b *testing.B) {
267+
func benchmarkJavascript(b *testing.B, cache bool) {
268+
disableCache = !cache
241269
for i := 0; i < b.N; i++ {
242-
ret, err := javascript(nil, &node.Node{}, `
270+
ret, err := javascript(nil, `
243271
if (!title) {
244272
""
245273
} else if (!name) {
@@ -257,3 +285,11 @@ func BenchmarkJavascript(b *testing.B) {
257285
}
258286
}
259287
}
288+
289+
func BenchmarkJavascriptWithNoCache(b *testing.B) {
290+
benchmarkJavascript(b, false)
291+
}
292+
293+
func BenchmarkJavascriptWithCache(b *testing.B) {
294+
benchmarkJavascript(b, true)
295+
}

‎go.mod‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ require (
1212
github.com/go-chi/chi v4.1.2+incompatible
1313
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
1414
github.com/google/uuid v1.1.2
15-
github.com/jf-tech/go-corelib v0.0.3
15+
github.com/jf-tech/go-corelib v0.0.4
1616
github.com/spf13/cobra v1.0.0
1717
github.com/spf13/pflag v1.0.5 // indirect
1818
github.com/stretchr/testify v1.6.1

‎go.sum‎

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uG
6464
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
6565
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
6666
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
67-
github.com/jf-tech/go-corelib v0.0.3 h1:8Dwg5MXZ6p6mUowMJpBPAeAAISmDzUcCUQoPFjYy6IA=
68-
github.com/jf-tech/go-corelib v0.0.3/go.mod h1:0+Fejzd53JtexKE5VI8I06WiBNATLIURRJgPrv4Yysg=
67+
github.com/jf-tech/go-corelib v0.0.4 h1:XP5w5bumH/zR6/EZGzD4webeZ1BPU62xZvraAiyIqdc=
68+
github.com/jf-tech/go-corelib v0.0.4/go.mod h1:0+Fejzd53JtexKE5VI8I06WiBNATLIURRJgPrv4Yysg=
6969
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
7070
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
7171
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
@@ -116,7 +116,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
116116
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
117117
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
118118
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
119-
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
120119
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
121120
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
122121
github.com/tkuchiki/go-timezone v0.2.0 h1:yyZVHtQRVZ+wvlte5HXvSpBkR0dPYnPEIgq9qqAqltk=

‎handlers/omni/v2/transform/invokeCustomFunc_test.go‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func TestInvokeCustomFunc_Success(t *testing.T) {
7474
{Const: strs.StrPtr("-"), kind: KindConst},
7575
{
7676
CustomFunc: &CustomFuncDecl{
77-
Name: "javascript",
77+
Name: "javascript_with_context",
7878
Args: []*Decl{
7979
{Const: strs.StrPtr("var n=JSON.parse(_node); '['+n.B+'/'+n.C+']'"), kind: KindConst},
8080
},

‎samples/omniv2/json/2_multiple_objects.schema.json‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"title": { "xpath": "title" },
1515
"custom_func_demo": {
1616
"custom_func": {
17-
"name": "javascript",
17+
"name": "javascript_with_context",
1818
"args": [
1919
{ "const": "var n = JSON.parse(_node);n.author+'/'+n.title+'/'+press" },
2020
{ "const": "press:string" }, { "xpath": "../../name" }

‎samples/omniv2/json/json_test.go‎

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
package json
22

33
import (
4+
"bytes"
5+
"io/ioutil"
46
"testing"
57

68
"github.com/bradleyjkemp/cupaloy"
79
"github.com/jf-tech/go-corelib/jsons"
810

11+
"github.com/jf-tech/omniparser"
912
"github.com/jf-tech/omniparser/samples/sampleutil"
13+
"github.com/jf-tech/omniparser/transformctx"
1014
)
1115

1216
func Test1_Single_Object(t *testing.T) {
@@ -23,3 +27,39 @@ func Test3_XPathDynamic(t *testing.T) {
2327
cupaloy.SnapshotT(t, jsons.BPJ(sampleutil.SampleTestCommon(t,
2428
"./3_xpathdynamic.schema.json", "./3_xpathdynamic.input.json")))
2529
}
30+
31+
var benchSchemaFile = "./2_multiple_objects.schema.json"
32+
var benchInputFile = "./2_multiple_objects.input.json"
33+
var benchSchema omniparser.Schema
34+
var benchInput []byte
35+
36+
func init() {
37+
schema, err := ioutil.ReadFile(benchSchemaFile)
38+
if err != nil {
39+
panic(err)
40+
}
41+
benchSchema, err = omniparser.NewSchema("bench", bytes.NewReader(schema))
42+
if err != nil {
43+
panic(err)
44+
}
45+
benchInput, err = ioutil.ReadFile(benchInputFile)
46+
if err != nil {
47+
panic(err)
48+
}
49+
}
50+
51+
func Benchmark2_Multiple_Objects(b *testing.B) {
52+
for i := 0; i < b.N; i++ {
53+
transform, err := benchSchema.NewTransform(
54+
"bench", bytes.NewReader(benchInput), &transformctx.Ctx{})
55+
if err != nil {
56+
panic(err)
57+
}
58+
for transform.Next() {
59+
_, err := transform.Read()
60+
if err != nil {
61+
panic(err)
62+
}
63+
}
64+
}
65+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /