@@ -5,9 +5,11 @@ import (
55	"fmt" 
66	"strconv" 
77	"strings" 
8+ 	"unsafe" 
89
910	node "github.com/antchfx/xmlquery" 
1011	"github.com/dop251/goja" 
12+ 	"github.com/jf-tech/go-corelib/caches" 
1113	"github.com/jf-tech/go-corelib/strs" 
1214
1315	"github.com/jf-tech/omniparser/nodes" 
@@ -62,20 +64,92 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac
6264	}
6365}
6466
65- func  javascript (_  * transformctx.Ctx , n  * node.Node , js  string , args  ... string ) (string , error ) {
67+ // For debugging/testing purpose so we can easily disable all the caches. But not exported. We always 
68+ // want caching in production. 
69+ var  disableCache  =  false 
70+ 71+ // JSProgramCache caches *goja.Program. A *goja.Program is compiled javascript and it can be used 
72+ // across multiple goroutines and across different *goja.Runtime. 
73+ var  JSProgramCache  =  caches .NewLoadingCache () // per schema so won't have too many, no need to put a hard cap. 
74+ // JSRuntimeCache caches *goja.Runtime. A *goja.Runtime is a javascript VM. It can *not* be shared 
75+ // across multiple goroutines. 
76+ var  JSRuntimeCache  =  caches .NewLoadingCache (100 ) // per transform, plus expensive, a smaller cap. 
77+ // NodeToJSONCache caches *node.Node tree to translated JSON string. 
78+ var  NodeToJSONCache  =  caches .NewLoadingCache (100 ) // per transform, plus expensive, a smaller cap. 
79+ 80+ func  getProgram (js  string ) (* goja.Program , error ) {
81+ 	if  disableCache  {
82+ 		return  goja .Compile ("" , js , false )
83+ 	}
84+ 	p , err  :=  JSProgramCache .Get (js , func (interface {}) (interface {}, error ) {
85+ 		return  goja .Compile ("" , js , false )
86+ 	})
87+ 	if  err  !=  nil  {
88+ 		return  nil , err 
89+ 	}
90+ 	return  p .(* goja.Program ), nil 
91+ }
92+ 93+ func  ptrAddrStr (p  unsafe.Pointer ) string  {
94+ 	return  strconv .FormatUint (uint64 (uintptr (p )), 16 )
95+ }
96+ 97+ func  getRuntime (ctx  * transformctx.Ctx ) * goja.Runtime  {
98+ 	if  disableCache  {
99+ 		return  goja .New ()
100+ 	}
101+ 	// a VM can be reused as long as not across thread. We don't have access to 
102+ 	// thread/goroutine id (nor do we want to use some hack to get it, see 
103+ 	// https://golang.org/doc/faq#no_goroutine_id). Instead, we use ctx as an 
104+ 	// indicator - omniparser runs on a single thread per transform. And ctx is 
105+ 	// is per transform. 
106+ 	addr  :=  ptrAddrStr (unsafe .Pointer (ctx ))
107+ 	vm , _  :=  JSRuntimeCache .Get (addr , func (interface {}) (interface {}, error ) {
108+ 		return  goja .New (), nil 
109+ 	})
110+ 	return  vm .(* goja.Runtime )
111+ }
112+ 113+ func  getNodeJSON (n  * node.Node ) string  {
114+ 	if  disableCache  {
115+ 		return  nodes .JSONify2 (n )
116+ 	}
117+ 	addr  :=  ptrAddrStr (unsafe .Pointer (n ))
118+ 	j , _  :=  NodeToJSONCache .Get (addr , func (interface {}) (interface {}, error ) {
119+ 		return  nodes .JSONify2 (n ), nil 
120+ 	})
121+ 	return  j .(string )
122+ }
123+ 124+ // javascriptWithContext is a custom_func that runs a javascript with optional arguments and 
125+ // with current node JSON, if the context node is provided. 
126+ func  javascriptWithContext (ctx  * transformctx.Ctx , n  * node.Node , js  string , args  ... string ) (string , error ) {
66127	if  len (args )% 2  !=  0  {
67128		return  "" , errors .New ("invalid number of args to 'javascript'" )
68129	}
69- 	vm  :=  goja .New ()
70- 	vm .Set (argNameNode , nodes .JSONify2 (n ))
130+ 	program , err  :=  getProgram (js )
131+ 	if  err  !=  nil  {
132+ 		return  "" , fmt .Errorf ("invalid javascript: %s" , err .Error ())
133+ 	}
134+ 	runtime  :=  getRuntime (ctx )
135+ 	var  varnames  []string 
136+ 	defer  func () {
137+ 		for  i  :=  range  varnames  {
138+ 			runtime .Set (varnames [i ], nil )
139+ 		}
140+ 	}()
71141	for  i  :=  0 ; i  <  len (args )/ 2 ; i ++  {
72- 		n ,  v , err  :=  parseArgTypeAndValue (args [i * 2 ], args [i * 2 + 1 ])
142+ 		varname ,  val , err  :=  parseArgTypeAndValue (args [i * 2 ], args [i * 2 + 1 ])
73143		if  err  !=  nil  {
74144			return  "" , err 
75145		}
76- 		vm .Set (n , v )
146+ 		runtime .Set (varname , val )
147+ 		varnames  =  append (varnames , varname )
148+ 	}
149+ 	if  n  !=  nil  {
150+ 		runtime .Set (argNameNode , getNodeJSON (n ))
77151	}
78- 	v , err  :=  vm . RunString ( js )
152+ 	v , err  :=  runtime . RunProgram ( program )
79153	if  err  !=  nil  {
80154		return  "" , err 
81155	}
@@ -86,3 +160,9 @@ func javascript(_ *transformctx.Ctx, n *node.Node, js string, args ...string) (s
86160		return  v .String (), nil 
87161	}
88162}
163+ 164+ // javascript is a custom_func that runs a javascript with optional arguments and without context 
165+ // node JSON provided. 
166+ func  javascript (ctx  * transformctx.Ctx , js  string , args  ... string ) (string , error ) {
167+ 	return  javascriptWithContext (ctx , nil , js , args ... )
168+ }
0 commit comments