@@ -5,9 +5,11 @@ import (
55 "fmt"
66 "strconv"
77 "strings"
8+ "unsafe"
89
910 node "github.com/antchfx/xmlquery"
1011 "github.com/dop251/goja"
12+ "github.com/jf-tech/go-corelib/caches"
1113 "github.com/jf-tech/go-corelib/strs"
1214
1315 "github.com/jf-tech/omniparser/nodes"
@@ -62,20 +64,92 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac
6264 }
6365}
6466
65- func javascript (_ * transformctx.Ctx , n * node.Node , js string , args ... string ) (string , error ) {
67+ // For debugging/testing purpose so we can easily disable all the caches. But not exported. We always
68+ // want caching in production.
69+ var disableCache = false
70+ 71+ // JSProgramCache caches *goja.Program. A *goja.Program is compiled javascript and it can be used
72+ // across multiple goroutines and across different *goja.Runtime.
73+ var JSProgramCache = caches .NewLoadingCache () // per schema so won't have too many, no need to put a hard cap.
74+ // JSRuntimeCache caches *goja.Runtime. A *goja.Runtime is a javascript VM. It can *not* be shared
75+ // across multiple goroutines.
76+ var JSRuntimeCache = caches .NewLoadingCache (100 ) // per transform, plus expensive, a smaller cap.
77+ // NodeToJSONCache caches *node.Node tree to translated JSON string.
78+ var NodeToJSONCache = caches .NewLoadingCache (100 ) // per transform, plus expensive, a smaller cap.
79+ 80+ func getProgram (js string ) (* goja.Program , error ) {
81+ if disableCache {
82+ return goja .Compile ("" , js , false )
83+ }
84+ p , err := JSProgramCache .Get (js , func (interface {}) (interface {}, error ) {
85+ return goja .Compile ("" , js , false )
86+ })
87+ if err != nil {
88+ return nil , err
89+ }
90+ return p .(* goja.Program ), nil
91+ }
92+ 93+ func ptrAddrStr (p unsafe.Pointer ) string {
94+ return strconv .FormatUint (uint64 (uintptr (p )), 16 )
95+ }
96+ 97+ func getRuntime (ctx * transformctx.Ctx ) * goja.Runtime {
98+ if disableCache {
99+ return goja .New ()
100+ }
101+ // a VM can be reused as long as not across thread. We don't have access to
102+ // thread/goroutine id (nor do we want to use some hack to get it, see
103+ // https://golang.org/doc/faq#no_goroutine_id). Instead, we use ctx as an
104+ // indicator - omniparser runs on a single thread per transform. And ctx is
105+ // is per transform.
106+ addr := ptrAddrStr (unsafe .Pointer (ctx ))
107+ vm , _ := JSRuntimeCache .Get (addr , func (interface {}) (interface {}, error ) {
108+ return goja .New (), nil
109+ })
110+ return vm .(* goja.Runtime )
111+ }
112+ 113+ func getNodeJSON (n * node.Node ) string {
114+ if disableCache {
115+ return nodes .JSONify2 (n )
116+ }
117+ addr := ptrAddrStr (unsafe .Pointer (n ))
118+ j , _ := NodeToJSONCache .Get (addr , func (interface {}) (interface {}, error ) {
119+ return nodes .JSONify2 (n ), nil
120+ })
121+ return j .(string )
122+ }
123+ 124+ // javascriptWithContext is a custom_func that runs a javascript with optional arguments and
125+ // with current node JSON, if the context node is provided.
126+ func javascriptWithContext (ctx * transformctx.Ctx , n * node.Node , js string , args ... string ) (string , error ) {
66127 if len (args )% 2 != 0 {
67128 return "" , errors .New ("invalid number of args to 'javascript'" )
68129 }
69- vm := goja .New ()
70- vm .Set (argNameNode , nodes .JSONify2 (n ))
130+ program , err := getProgram (js )
131+ if err != nil {
132+ return "" , fmt .Errorf ("invalid javascript: %s" , err .Error ())
133+ }
134+ runtime := getRuntime (ctx )
135+ var varnames []string
136+ defer func () {
137+ for i := range varnames {
138+ runtime .Set (varnames [i ], nil )
139+ }
140+ }()
71141 for i := 0 ; i < len (args )/ 2 ; i ++ {
72- n , v , err := parseArgTypeAndValue (args [i * 2 ], args [i * 2 + 1 ])
142+ varname , val , err := parseArgTypeAndValue (args [i * 2 ], args [i * 2 + 1 ])
73143 if err != nil {
74144 return "" , err
75145 }
76- vm .Set (n , v )
146+ runtime .Set (varname , val )
147+ varnames = append (varnames , varname )
148+ }
149+ if n != nil {
150+ runtime .Set (argNameNode , getNodeJSON (n ))
77151 }
78- v , err := vm . RunString ( js )
152+ v , err := runtime . RunProgram ( program )
79153 if err != nil {
80154 return "" , err
81155 }
@@ -86,3 +160,9 @@ func javascript(_ *transformctx.Ctx, n *node.Node, js string, args ...string) (s
86160 return v .String (), nil
87161 }
88162}
163+ 164+ // javascript is a custom_func that runs a javascript with optional arguments and without context
165+ // node JSON provided.
166+ func javascript (ctx * transformctx.Ctx , js string , args ... string ) (string , error ) {
167+ return javascriptWithContext (ctx , nil , js , args ... )
168+ }
0 commit comments