Yieldable LPEG Parser
[
Date Prev][
Date Next][
Thread Prev][
Thread Next]
[
Date Index]
[
Thread Index]
- Subject: Yieldable LPEG Parser
- From: William Ahern <william@...>
- Date: 2012年1月31日 22:01:23 -0800
Below is a _preliminary_ patch to make lpeg yieldable. It Works For Me(tm),
but I haven't hammered it yet.
Instead of passing a string to lpeg.match, you can pass an object (userdata
or table). The object must have a "tovector" method, which returns a tuple:
[light]userdata or string, length, end-of-string indicator. The method is
called whenever the engine gets to the end of the current vector and the
end-of-string indicator is still false. It calls tovector again and expects
a vector that is at least one more byte longer or end-of-string indicator
set. The source memory address can be different.
Crucially, the tovector method can yield. (See checkvector() and
growvector() in the patched file for how tovector is called.)
This allows parsing a non-blocking source (raw socket, http body, etc)
entirely online using a dynamic buffer.
Please let me know if there's any interest in this, either as a standalone
patch or, ideally, as something to be integrated into the upstream LPEG
project. If there's interest in merging it upstream I'd be happy to spend
time appeasing Roberto (or whomever the maintainer is), including making it
at least compilable under Lua 5.1, refactoring the way state is stored, and
experiment with a better interface (e.g. different ways to acquire vector
sources, or having a restartable match routine instead of a yieldable one,
which would be make it useable from Lua 5.1 and LuaJIT.)
Otherwise I'm just gonna fork and integrate it into my cqueues (continuation
queues using epoll/kqueue) network I/O library.
Apologies for the indentation issues. I can fix that, too.
--- ../lpeg-0.10.2/lpeg.c	2011年02月16日 07:03:25.000000000 -0800
+++ ./lpeg.c	2012年01月31日 21:06:03.000000000 -0800
@@ -10,6 +10,7 @@
 #include <ctype.h>
 #include <limits.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <string.h>
 
 #include "lua.h"
@@ -23,6 +24,14 @@
 #define MAXSTACKIDX	"lpeg-maxstack"
 
 
+#define SAY_(file, func, line, fmt, ...) \
+ fprintf(stderr, "%s:%d: " fmt "%s", __func__, __LINE__, __VA_ARGS__)
+
+#define SAY(...) SAY_(__FILE__, __func__, __LINE__, __VA_ARGS__, "\n")
+
+#define HAI SAY("hai")
+
+
 /*
 ** compatibility with Lua 5.2
 */
@@ -72,12 +81,26 @@
 /* index, on Lua stack, for capture list */
 #define caplistidx(ptop)	((ptop) + 2)
 
+/* index, on Lua stack, for capture list size */
+#define capsizeidx(ptop)	((ptop) + 3)
+
+/* index, on Lua stack, for capture top */
+#define captopidx(ptop)	((ptop) + 4)
+
 /* index, on Lua stack, for pattern's fenv */
-#define penvidx(ptop)	((ptop) + 3)
+#define penvidx(ptop)	((ptop) + 5)
 
 /* index, on Lua stack, for backtracking stack */
-#define stackidx(ptop)	((ptop) + 4)
+#define stackidx(ptop)	((ptop) + 6)
+
+/* index, on Lua stack, for backtracking stack size */
+#define stsizeidx(ptop)	((ptop) + 7)
 
+/* index, on Lua stack, for backtracking stack size */
+#define sttopidx(ptop)	((ptop) + 8)
+
+/* index, on Lua stack, for instruction pointer */
+#define instidx(ptop)	((ptop) + 9)
 
 
 typedef unsigned char byte;
@@ -183,8 +206,16 @@
 #define iscapnosize(k)	((k) == Cposition || (k) == Cconst)
 
 
+#define NULLSIDX SIZE_MAX
+#define getsidx(o, s) ((s) - (o))
+
+static const char *getsptr(const char *o, size_t s) {
+	return (s == NULLSIDX)? (char *)0 : &o[s];
+} /* getsptr() */
+
 typedef struct Capture {
- const char *s; /* position */
+// const char *s; /* position */
+ size_t s;
 short idx;
 byte kind;
 byte siz;
@@ -375,7 +406,8 @@
 
 
 typedef struct Stack {
- const char *s;
+// const char *s;
+ size_t s;
 const Instruction *p;
 int caplevel;
 } Stack;
@@ -388,18 +420,55 @@
 const char *o, const char *s, int ptop);
 
 
-static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
+static Capture *growcap (lua_State *L, Capture *cap, int captop, int fac, int ptop) {
 Capture *newc;
- if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
+ if (captop >= INT_MAX/((int)sizeof(Capture) * fac))
 luaL_error(L, "too many captures");
- newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
+ newc = (Capture *)lua_newuserdata(L, captop * fac * sizeof(Capture));
 memcpy(newc, cap, captop * sizeof(Capture));
 lua_replace(L, caplistidx(ptop));
+ lua_pushinteger(L, captop * fac);
+ lua_replace(L, capsizeidx(ptop));
 return newc;
 }
 
 
-static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
+static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
+ return growcap(L, cap, captop, 2, ptop);
+}
+
+
+static int getcapsize(lua_State *L, int ptop) {
+	return lua_tointeger(L, capsizeidx(ptop));
+} /* getcapsize() */
+
+
+static int getcaptop(lua_State *L, int ptop) {
+	return lua_tointeger(L, captopidx(ptop));
+} /* getcaptop() */
+
+
+static int setcaptop(lua_State *L, int captop, int ptop) {
+	lua_pushinteger(L, captop);
+	lua_replace(L, captopidx(ptop));
+	return captop;
+} /* setcaptop() */
+
+
+static Capture *getcaplist(lua_State *L, int ptop) {
+	return (Capture *)lua_touserdata(L, caplistidx(ptop));
+} /* getcaplist() */
+
+
+static Capture *movecap(lua_State *L, Capture *cap, int ptop) {
+	if (lua_type(L, caplistidx(ptop)) == LUA_TLIGHTUSERDATA) {
+		return growcap(L, cap, getcapsize(L, ptop), 1, ptop);
+	} else
+		return cap;
+} /* movecap() */
+
+
+static Stack *growstack (lua_State *L, Stack **stacklimit, int fac, int ptop) {
 Stack *stack = getstackbase(L, ptop);
 Stack *newstack;
 int n = *stacklimit - stack;
@@ -407,50 +476,103 @@
 lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
 max = lua_tointeger(L, -1);
 lua_pop(L, 1);
- if (n >= max)
+ if (fac > 1 && n >= max)
 luaL_error(L, "too many pending calls/choices");
- newn = 2*n; if (newn > max) newn = max;
+ newn = fac*n; if (newn > max) newn = max;
 newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
 memcpy(newstack, stack, n * sizeof(Stack));
 lua_replace(L, stackidx(ptop));
+ lua_pushinteger(L, newn);
+ lua_replace(L, stsizeidx(ptop));
 *stacklimit = newstack + newn;
 return newstack + n;
+}
 
+
+static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
+ return growstack(L, stacklimit, 2, ptop);
 }
 
 
-static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
+static int getstacksize(lua_State *L, int ptop) {
+	return lua_tointeger(L, stsizeidx(ptop));
+} /* getstacksize() */
+
+
+static Stack *getstacklimit(lua_State *L, int ptop) {
+	return &getstackbase(L, ptop)[getstacksize(L, ptop)];
+} /* getstacklimit() */
+
+
+static Stack *getstacktop(lua_State *L, int ptop) {
+	return &getstackbase(L, ptop)[lua_tointeger(L, sttopidx(ptop))];
+} /* getstacktop() */
+
+
+static int setstacktop(lua_State *L, Stack *stack, int ptop) {
+	int sttop = stack - getstackbase(L, ptop);
+	lua_pushinteger(L, sttop);
+	lua_replace(L, sttopidx(ptop));
+	return sttop;
+} /* setstacktop() */
+
+
+static Stack *movestack(lua_State *L, Stack *stack, Stack **stacklimit, int ptop) {
+	if (lua_type(L, stackidx(ptop)) == LUA_TLIGHTUSERDATA) {
+		setstacktop(L, stack, ptop);
+		growstack(L, stacklimit, 1, ptop);
+		return getstacktop(L, ptop);
+	} else
+		return stack;
+} /* movestack() */
+
+
+static Instruction *getip(lua_State *L, int ptop) {
+	return lua_touserdata(L, instidx(ptop));
+} /* getip() */
+
+
+static const Instruction *setip(lua_State *L, const Instruction *p, int ptop) {
+	lua_pushlightuserdata(L, (void *)p);
+	lua_replace(L, instidx(ptop));
+	return p;
+} /* setip() */
+
+
+static void adddyncaptures (const char *o, const char *s, Capture *base, int n, int fd) {
 int i;
 assert(base[0].kind == Cruntime && base[0].siz == 0);
 base[0].idx = fd; /* first returned capture */
 for (i = 1; i < n; i++) { /* add extra captures */
 base[i].siz = 1; /* mark it as closed */
- base[i].s = s;
+ base[i].s = getsidx(o, s);
 base[i].kind = Cruntime;
 base[i].idx = fd + i; /* stack index */
 }
 base[n].kind = Cclose; /* add closing entry */
 base[n].siz = 1;
- base[n].s = s;
+ base[n].s = getsidx(o, s);
 }
 
+static _Bool growvector(lua_State *L, const char **_o, const char **_s, const char **_e, _Bool *eos, Stack **stack, Stack **stacklimit, Capture **cap, int captop, const Instruction *p, int ptop);
 
 #define condfailed(p)	{ int f = p->i.offset; if (f) p+=f; else goto fail; }
+#define more() (!_eos && growvector(L, &_o, &_s, &_e, &_eos, &stack, &stacklimit, &capture, captop, p, ptop))
+#define onemore() ((_s < _e) || more())
 
 static const char *match (lua_State *L,
- const char *o, const char *s, const char *e,
- Instruction *op, Capture *capture, int ptop) {
- Stack stackbase[INITBACK];
- Stack *stacklimit = stackbase + INITBACK;
- Stack *stack = stackbase; /* point to first empty slot in stack */
- int capsize = INITCAPSIZE;
- int captop = 0; /* point to first empty slot in captures */
- const Instruction *p = op;
- stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
- lua_pushlightuserdata(L, stackbase);
+ const char *_o, const char *_s, const char *_e, _Bool _eos,
+ int ptop) {
+ Stack *stack = getstacktop(L, ptop);
+ Stack *stacklimit = getstacklimit(L, ptop);
+ Capture *capture = getcaplist(L, ptop);
+ int capsize = getcapsize(L, ptop);
+ int captop = getcaptop(L, ptop); /* point to first empty slot in captures */
+ const Instruction *p = getip(L, ptop);
 for (;;) {
+next:
 #if defined(DEBUG)
- printf("s: |%s| stck: %d c: %d ",
+ printf("s: |%s| stck: %ld c: %d ",
 s, stack - getstackbase(L, ptop), captop);
 printinst(op, p);
 #endif
@@ -458,53 +580,55 @@
 case IEnd: {
 assert(stack == getstackbase(L, ptop) + 1);
 capture[captop].kind = Cclose;
- capture[captop].s = NULL;
- return s;
+ capture[captop].s = NULLSIDX;
+ return _s;
 }
 case IGiveup: {
 assert(stack == getstackbase(L, ptop));
 return NULL;
 }
 case IRet: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULLSIDX);
 p = (--stack)->p;
 continue;
 }
 case IAny: {
 int n = p->i.aux;
- if (n <= e - s) { p++; s += n; }
- else condfailed(p);
+ do {
+ if (n <= _e - _s) { p++; _s += n; goto next; }
+ } while (more());
+ condfailed(p);
 continue;
 }
 case IChar: {
- if ((byte)*s == p->i.aux && s < e) { p++; s++; }
- else condfailed(p);
+ if (onemore() && (byte)*_s == p->i.aux)
+ { p++; _s++; }
+	else condfailed(p);
 continue;
 }
 case ISet: {
- int c = (byte)*s;
- if (testchar((p+1)->buff, c) && s < e)
- { p += CHARSETINSTSIZE; s++; }
+ if (onemore() && testchar((p+1)->buff, (byte)*_s))
+ { p += CHARSETINSTSIZE; _s++; }
 else condfailed(p);
 continue;
 }
 case IBack: {
 int n = p->i.aux;
- if (n > s - o) goto fail;
- s -= n; p++;
+ if (n > _s - _o) goto fail;
+ _s -= n; p++;
 continue;
 }
 case ISpan: {
- for (; s < e; s++) {
- int c = (byte)*s;
+ for (; onemore(); _s++) {
+ int c = (byte)*_s;
 if (!testchar((p+1)->buff, c)) break;
 }
 p += CHARSETINSTSIZE;
 continue;
 }
 case IFunc: {
- const char *r = (p+1)->f(s, e, o, (p+2)->buff);
- if (r != NULL) { s = r; p += funcinstsize(p); }
+ const char *r = (p+1)->f(_s, _e, _o, (p+2)->buff);
+ if (r != NULL) { _s = r; p += funcinstsize(p); }
 else condfailed(p);
 continue;
 }
@@ -516,7 +640,7 @@
 if (stack == stacklimit)
 stack = doublestack(L, &stacklimit, ptop);
 stack->p = dest(0, p);
- stack->s = s - p->i.aux;
+ stack->s = getsidx(_o, _s - p->i.aux);
 stack->caplevel = captop;
 stack++;
 p++;
@@ -525,28 +649,28 @@
 case ICall: {
 if (stack == stacklimit)
 stack = doublestack(L, &stacklimit, ptop);
- stack->s = NULL;
+ stack->s = NULLSIDX;
 stack->p = p + 1; /* save return address */
 stack++;
 p += p->i.offset;
 continue;
 }
 case ICommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULLSIDX);
 stack--;
 p += p->i.offset;
 continue;
 }
 case IPartialCommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- (stack - 1)->s = s;
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULLSIDX);
+ (stack - 1)->s = getsidx(_o, _s);
 (stack - 1)->caplevel = captop;
 p += p->i.offset;
 continue;
 }
 case IBackCommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- s = (--stack)->s;
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULLSIDX);
+ _s = getsptr(_o, (--stack)->s);
 captop = stack->caplevel;
 p += p->i.offset;
 continue;
@@ -559,15 +683,15 @@
 fail: { /* pattern failed: try to backtrack */
 do { /* remove pending calls */
 assert(stack > getstackbase(L, ptop));
- s = (--stack)->s;
- } while (s == NULL);
+ _s = getsptr(_o, (--stack)->s);
+ } while (_s == NULL);
 captop = stack->caplevel;
 p = stack->p;
 continue;
 }
 case ICloseRunTime: {
 int fr = lua_gettop(L) + 1; /* stack index of first result */
- int ncap = runtimecap(L, capture + captop, capture, o, s, ptop);
+ int ncap = runtimecap(L, capture + captop, capture, _o, _s, ptop);
 lua_Integer res = lua_tointeger(L, fr) - 1; /* offset */
 int n = lua_gettop(L) - fr; /* number of new captures */
 if (res == -1) { /* may not be a number */
@@ -576,11 +700,11 @@
 goto fail; /* and fail */
 }
 else if (lua_isboolean(L, fr)) /* true? */
- res = s - o; /* keep current position */
+ res = _s - _o; /* keep current position */
 }
- if (res < s - o || res > e - o)
+ if (res < _s - _o || res > _e - _o)
 luaL_error(L, "invalid position returned by match-time capture");
- s = o + res; /* update current position */
+ _s = _o + res; /* update current position */
 captop -= ncap; /* remove nested captures */
 lua_remove(L, fr); /* remove first result (offset) */
 if (n > 0) { /* captures? */
@@ -588,17 +712,17 @@
 capture = doublecap(L, capture, captop, ptop);
 capsize = 2 * captop;
 }
- adddyncaptures(s, capture + captop - n - 1, n, fr);
+ adddyncaptures(_o, _s, capture + captop - n - 1, n, fr);
 }
 p++;
 continue;
 }
 case ICloseCapture: {
- const char *s1 = s - getoff(p);
+ const char *s1 = _s - getoff(p);
 assert(captop > 0);
 if (capture[captop - 1].siz == 0 &&
- s1 - capture[captop - 1].s < UCHAR_MAX) {
- capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
+ getsidx(_o, s1) - capture[captop - 1].s < UCHAR_MAX) {
+ capture[captop - 1].siz = getsidx(_o, s1) - capture[captop - 1].s + 1;
 p++;
 continue;
 }
@@ -616,7 +740,7 @@
 case IFullCapture:
 capture[captop].siz = getoff(p) + 1; /* save capture size */
 capture: {
- capture[captop].s = s - getoff(p);
+ capture[captop].s = getsidx(_o, _s - getoff(p));
 capture[captop].idx = p->i.offset;
 capture[captop].kind = getkind(p);
 if (++captop >= capsize) {
@@ -668,7 +792,8 @@
 if (backtop >= MAXBACKVER)
 return luaL_error(L, "too many pending calls/choices");
 back[backtop].p = dest(0, p);
- back[backtop++].s = dummy;
+// back[backtop++].s = dummy;
+ back[backtop++].s = 0;
 p++;
 continue;
 }
@@ -676,7 +801,7 @@
 assert((p + 1)->i.code != IRet); /* no tail call */
 if (backtop >= MAXBACKVER)
 return luaL_error(L, "too many pending calls/choices");
- back[backtop].s = NULL;
+ back[backtop].s = NULLSIDX;
 back[backtop++].p = p + 1;
 goto dojmp;
 }
@@ -685,12 +810,12 @@
 if (postable == 0) /* grammar still not fixed? */
 goto fail; /* to be verified later */
 for (i = 0; i < backtop; i++) {
- if (back[i].s == NULL && back[i].p == p + 1)
+ if (back[i].s == NULLSIDX && back[i].p == p + 1)
 return luaL_error(L, "%s is left recursive", val2str(L, rule));
 }
 if (backtop >= MAXBACKVER)
 return luaL_error(L, "too many pending calls/choices");
- back[backtop].s = NULL;
+ back[backtop].s = NULLSIDX;
 back[backtop++].p = p + 1;
 p = op + getposition(L, postable, p->i.offset);
 continue;
@@ -753,7 +878,7 @@
 do {
 if (backtop-- == 0)
 return 1; /* no more backtracking */
- } while (back[backtop].s == NULL);
+ } while (back[backtop].s == NULLSIDX);
 p = back[backtop].p;
 continue;
 }
@@ -1836,7 +1961,8 @@
 Capture *ocap; /* (original) capture list */
 lua_State *L;
 int ptop; /* index of last argument to 'match' */
- const char *s; /* original string */
+// const char *s; /* original string */
+ size_t s;
 int valuecached; /* value stored in cache slot */
 } CapState;
 
@@ -1845,14 +1971,14 @@
 
 #define isclosecap(cap)	(captype(cap) == Cclose)
 
-#define closeaddr(c)	((c)->s + (c)->siz - 1)
+#define closeaddr(c, _o)	getsptr((_o), ((c)->s + (c)->siz - 1))
 
 #define isfullcap(cap)	((cap)->siz != 0)
 
 #define getfromenv(cs,v)	lua_rawgeti((cs)->L, penvidx((cs)->ptop), v)
 #define pushluaval(cs)		getfromenv(cs, (cs)->cap->idx)
 
-#define pushsubject(cs, c) lua_pushlstring((cs)->L, (c)->s, (c)->siz - 1)
+#define pushsubject(cs, c, _o) lua_pushlstring((cs)->L, (_o) + (c)->s, (c)->siz - 1)
 
 
 #define updatecache(cs,v) { if ((v) != (cs)->valuecached) updatecache_(cs,v); }
@@ -1865,7 +1991,7 @@
 }
 
 
-static int pushcapture (CapState *cs);
+static int pushcapture (CapState *cs, const char *_o);
 
 
 static Capture *findopen (Capture *cap) {
@@ -1894,17 +2020,17 @@
 }
 
 
-static int pushallvalues (CapState *cs, int addextra) {
+static int pushallvalues (CapState *cs, int addextra, const char *_o) {
 Capture *co = cs->cap;
 int n = 0;
 if (isfullcap(cs->cap++)) {
- pushsubject(cs, co); /* push whole match */
+ pushsubject(cs, co, _o); /* push whole match */
 return 1;
 }
 while (!isclosecap(cs->cap))
- n += pushcapture(cs);
+ n += pushcapture(cs, _o);
 if (addextra || n == 0) { /* need extra? */
- lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
+ lua_pushlstring(cs->L, getsptr(_o, co->s), cs->cap->s - co->s); /* push whole match */
 n++;
 }
 cs->cap++; /* skip close entry */
@@ -1937,18 +2063,18 @@
 }
 
 
-static int backrefcap (CapState *cs) {
+static int backrefcap (CapState *cs, const char *_o) {
 int n;
 Capture *curr = cs->cap;
 pushluaval(cs); /* reference name */
 cs->cap = findback(cs, curr);
- n = pushallvalues(cs, 0);
+ n = pushallvalues(cs, 0, _o);
 cs->cap = curr + 1;
 return n;
 }
 
 
-static int tablecap (CapState *cs) {
+static int tablecap (CapState *cs, const char *_o) {
 lua_State *L = cs->L;
 int n = 0;
 lua_newtable(L);
@@ -1958,7 +2084,7 @@
 if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
 int k;
 pushluaval(cs); /* push group name */
- k = pushallvalues(cs, 0);
+ k = pushallvalues(cs, 0, _o);
 if (k == 0) { /* no value? */
 lua_pop(L, 1); /* remove group name */
 continue; /* and go on */
@@ -1969,7 +2095,7 @@
 }
 else {
 int i;
- int k = pushcapture(cs);
+ int k = pushcapture(cs, _o);
 for (i = k; i > 0; i--)
 lua_rawseti(L, -(i + 1), n + i);
 n += k;
@@ -1980,9 +2106,9 @@
 }
 
 
-static int querycap (CapState *cs) {
+static int querycap (CapState *cs, const char *_o) {
 int idx = cs->cap->idx;
- int n = pushallvalues(cs, 0);
+ int n = pushallvalues(cs, 0, _o);
 if (n > 1) /* extra captures? */
 lua_pop(cs->L, n - 1); /* throw them away */
 updatecache(cs, idx);
@@ -1996,11 +2122,11 @@
 }
 
 
-static int foldcap (CapState *cs) {
+static int foldcap (CapState *cs, const char *_o) {
 int n;
 lua_State *L = cs->L;
 int idx = cs->cap->idx;
- if (isfullcap(cs->cap++) || isclosecap(cs->cap) || (n = pushcapture(cs)) == 0)
+ if (isfullcap(cs->cap++) || isclosecap(cs->cap) || (n = pushcapture(cs, _o)) == 0)
 return luaL_error(L, "no initial value for fold capture");
 if (n > 1)
 lua_pop(L, n - 1); /* leave only one result */
@@ -2008,7 +2134,7 @@
 updatecache(cs, idx);
 lua_pushvalue(L, subscache(cs)); /* get folding function */
 lua_insert(L, -2); /* put it before accumulator */
- n = pushcapture(cs); /* get other captures */
+ n = pushcapture(cs, _o); /* get other captures */
 lua_call(L, n + 1, 1); /* call folding function */
 }
 cs->cap++; /* skip close entry */
@@ -2016,11 +2142,11 @@
 }
 
 
-static int functioncap (CapState *cs) {
+static int functioncap (CapState *cs, const char *_o) {
 int n;
 int top = lua_gettop(cs->L);
 pushluaval(cs);
- n = pushallvalues(cs, 0);
+ n = pushallvalues(cs, 0, _o);
 lua_call(cs->L, n, LUA_MULTRET);
 return lua_gettop(cs->L) - top;
 }
@@ -2033,14 +2159,14 @@
 Capture *open = findopen(close);
 assert(captype(open) == Cruntime);
 close->kind = Cclose;
- close->s = s;
+ close->s = getsidx(o, s);
 cs.ocap = ocap; cs.cap = open; cs.L = L;
- cs.s = o; cs.valuecached = 0; cs.ptop = ptop;
+ cs.s = getsidx(o, o); cs.valuecached = 0; cs.ptop = ptop;
 luaL_checkstack(L, 4, "too many runtime captures");
 pushluaval(&cs);
 lua_pushvalue(L, SUBJIDX); /* push original subject */
 lua_pushinteger(L, s - o + 1); /* current position */
- n = pushallvalues(&cs, 0);
+ n = pushallvalues(&cs, 0, o);
 lua_call(L, n + 2, LUA_MULTRET);
 return close - open;
 }
@@ -2052,15 +2178,17 @@
 union {
 Capture *cp;
 struct {
- const char *s;
- const char *e;
+// const char *s;
+// const char *e;
+ size_t s;
+ size_t e;
 } s;
 } u;
 } StrAux;
 
 #define MAXSTRCAPS	10
 
-static int getstrcaps (CapState *cs, StrAux *cps, int n) {
+static int getstrcaps (CapState *cs, StrAux *cps, int n, const char *_o) {
 int k = n++;
 cps[k].isstring = 1;
 cps[k].u.s.s = cs->cap->s;
@@ -2069,7 +2197,7 @@
 if (n >= MAXSTRCAPS) /* too many captures? */
 cs->cap = nextcap(cs->cap); /* skip it */
 else if (captype(cs->cap) == Csimple)
- n = getstrcaps(cs, cps, n);
+ n = getstrcaps(cs, cps, n, _o);
 else {
 cps[n].isstring = 0;
 cps[n].u.cp = cs->cap;
@@ -2079,7 +2207,7 @@
 }
 cs->cap++; /* skip close */
 }
- cps[k].u.s.e = closeaddr(cs->cap - 1);
+ cps[k].u.s.e = getsidx(_o, closeaddr(cs->cap - 1, _o));
 return n;
 }
 
@@ -2087,17 +2215,17 @@
 /*
 ** add next capture (which should be a string) to buffer
 */
-static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what, const char *_o);
 
 
-static void stringcap (luaL_Buffer *b, CapState *cs) {
+static void stringcap (luaL_Buffer *b, CapState *cs, const char *_o) {
 StrAux cps[MAXSTRCAPS];
 int n;
 size_t len, i;
 const char *c;
 updatecache(cs, cs->cap->idx);
 c = lua_tolstring(cs->L, subscache(cs), &len);
- n = getstrcaps(cs, cps, 0) - 1;
+ n = getstrcaps(cs, cps, 0, _o) - 1;
 for (i = 0; i < len; i++) {
 if (c[i] != '%' || c[++i] < '0' || c[i] > '9')
 luaL_addchar(b, c[i]);
@@ -2106,11 +2234,11 @@
 if (l > n)
 luaL_error(cs->L, "invalid capture index (%d)", l);
 else if (cps[l].isstring)
- luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
+ luaL_addlstring(b, getsptr(_o, cps[l].u.s.s), cps[l].u.s.e - cps[l].u.s.s);
 else {
 Capture *curr = cs->cap;
 cs->cap = cps[l].u.cp;
- if (addonestring(b, cs, "capture") == 0)
+ if (addonestring(b, cs, "capture", _o) == 0)
 luaL_error(cs->L, "no values in capture index %d", l);
 cs->cap = curr;
 }
@@ -2119,37 +2247,37 @@
 }
 
 
-static void substcap (luaL_Buffer *b, CapState *cs) {
- const char *curr = cs->cap->s;
+static void substcap (luaL_Buffer *b, CapState *cs, const char *_o) {
+ const char *curr = getsptr(_o, cs->cap->s);
 if (isfullcap(cs->cap)) /* no nested captures? */
 luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
 else {
 cs->cap++;
 while (!isclosecap(cs->cap)) {
- const char *next = cs->cap->s;
+ const char *next = getsptr(_o, cs->cap->s);
 luaL_addlstring(b, curr, next - curr); /* add text up to capture */
- if (addonestring(b, cs, "replacement") == 0) /* no capture value? */
+ if (addonestring(b, cs, "replacement", _o) == 0) /* no capture value? */
 curr = next; /* keep original text in final result */
 else
- curr = closeaddr(cs->cap - 1); /* continue after match */
+ curr = closeaddr(cs->cap - 1, _o); /* continue after match */
 }
- luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
+ luaL_addlstring(b, curr, getsptr(_o, cs->cap->s) - curr); /* add last piece of text */
 }
 cs->cap++; /* go to next capture */
 }
 
 
-static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what, const char *_o) {
 switch (captype(cs->cap)) {
 case Cstring:
- stringcap(b, cs); /* add capture directly to buffer */
+ stringcap(b, cs, _o); /* add capture directly to buffer */
 return 1;
 case Csubst:
- substcap(b, cs); /* add capture directly to buffer */
+ substcap(b, cs, _o); /* add capture directly to buffer */
 return 1;
 default: {
 lua_State *L = cs->L;
- int n = pushcapture(cs);
+ int n = pushcapture(cs, _o);
 if (n > 0) {
 if (n > 1) lua_pop(L, n - 1); /* only one result */
 if (!lua_isstring(L, -1))
@@ -2162,7 +2290,7 @@
 }
 
 
-static int pushcapture (CapState *cs) {
+static int pushcapture (CapState *cs, const char *_o) {
 luaL_checkstack(cs->L, 4, "too many captures");
 switch (captype(cs->cap)) {
 case Cposition: {
@@ -2183,7 +2311,7 @@
 return 1;
 }
 case Csimple: {
- int k = pushallvalues(cs, 1);
+ int k = pushallvalues(cs, 1, _o);
 if (k > 1)
 lua_insert(cs->L, -k); /* whole match is first result */
 return k;
@@ -2200,30 +2328,30 @@
 case Cstring: {
 luaL_Buffer b;
 luaL_buffinit(cs->L, &b);
- stringcap(&b, cs);
+ stringcap(&b, cs, _o);
 luaL_pushresult(&b);
 return 1;
 }
 case Csubst: {
 luaL_Buffer b;
 luaL_buffinit(cs->L, &b);
- substcap(&b, cs);
+ substcap(&b, cs, _o);
 luaL_pushresult(&b);
 return 1;
 }
 case Cgroup: {
 if (cs->cap->idx == 0) /* anonymous group? */
- return pushallvalues(cs, 0); /* add all nested values */
+ return pushallvalues(cs, 0, _o); /* add all nested values */
 else { /* named group: add no values */
 cs->cap = nextcap(cs->cap); /* skip capture */
 return 0;
 }
 }
- case Cbackref: return backrefcap(cs);
- case Ctable: return tablecap(cs);
- case Cfunction: return functioncap(cs);
- case Cquery: return querycap(cs);
- case Cfold: return foldcap(cs);
+ case Cbackref: return backrefcap(cs, _o);
+ case Ctable: return tablecap(cs, _o);
+ case Cfunction: return functioncap(cs, _o);
+ case Cquery: return querycap(cs, _o);
+ case Cfold: return foldcap(cs, _o);
 default: assert(0); return 0;
 }
 }
@@ -2235,9 +2363,9 @@
 if (!isclosecap(capture)) { /* is there any capture? */
 CapState cs;
 cs.ocap = cs.cap = capture; cs.L = L;
- cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
+ cs.s = getsidx(s, s); cs.valuecached = 0; cs.ptop = ptop;
 do { /* collect their values */
- n += pushcapture(&cs);
+ n += pushcapture(&cs, s);
 } while (!isclosecap(cs.cap));
 }
 if (n == 0) { /* no capture values? */
@@ -2327,12 +2455,110 @@
 }
 
 
-static int matchl (lua_State *L) {
+static int match_k(lua_State *L);
+
+static void popvector(lua_State *L, const char **s, size_t *l, _Bool *eos) {
+	switch (lua_type(L, -3)) {
+	case LUA_TUSERDATA:
+		/* FALL THROUGH */
+	case LUA_TLIGHTUSERDATA:
+		*s = lua_touserdata(L, -3);
+		*l = lua_tounsigned(L, -2);
+		*eos = lua_toboolean(L, -1);
+		lua_pop(L, 3);
+		break;
+	case LUA_TSTRING:
+		*s = lua_tostring(L, -3);
+		*l = lua_tounsigned(L, -2);
+		*eos = lua_toboolean(L, -1);
+		lua_pop(L, 3);
+		break;
+	default:
+		luaL_argerror(L, SUBJIDX, "invalid :tovector results");
+		*s = 0; /* silence compiler */
+		*l = 0; /* silence compiler */
+	}
+} /* popvector() */
+
+
+static _Bool growvector(lua_State *L, const char **_o, const char **_s, const char **_e, _Bool *eos, Stack **stack, Stack **stacklimit, Capture **cap, int captop, const Instruction *p, int ptop) {
+	size_t sidx = getsidx(*_o, *_s), eidx = getsidx(*_o, *_e);
+	size_t l;
+
+	*stack = movestack(L, *stack, stacklimit, ptop);
+	setstacktop(L, *stack, ptop);
+	*cap = movecap(L, *cap, ptop);
+	setcaptop(L, captop, ptop);
+	setip(L, p, ptop);
+	lua_pushunsigned(L, sidx);
+
+	lua_getfield(L, SUBJIDX, "tovector");
+	luaL_argcheck(L, lua_type(L, -1) == LUA_TFUNCTION, SUBJIDX, "user data object has no :tovector method");
+	lua_pushvalue(L, SUBJIDX);
+	lua_pushboolean(L, 1);
+	lua_callk(L, 2, 3, ptop, &match_k); /* expect ptr, len, and eos */
+	popvector(L, _o, &l, eos);
+	lua_pop(L, 1); /* sidx */
+	*_s = getsptr(*_o, sidx);
+	*_e = getsptr(*_o, l);
+	return (l > eidx); /* return whether grew or not */
+} /* growvector() */
+
+
+static const char *checkvector(lua_State *L, size_t *l, _Bool *eos) {
+	const char *s;
+
+	switch (lua_type(L, SUBJIDX)) {
+	case LUA_TSTRING:
+		*eos = 1;
+		return luaL_checklstring(L, SUBJIDX, l);
+	case LUA_TTABLE:
+		/* FALL THROUGH */
+	case LUA_TUSERDATA:
+		lua_getfield(L, SUBJIDX, "tovector");
+		luaL_argcheck(L, lua_type(L, -1) == LUA_TFUNCTION, SUBJIDX, "user data object has no :tovector method");
+		lua_pushvalue(L, SUBJIDX);
+		lua_pushboolean(L, 0);
+		lua_call(L, 2, 3); /* expect ptr, len, and eos */
+		popvector(L, &s, l, eos);
+		return s;
+	default:
+		luaL_argerror(L, SUBJIDX, "expected string or user data object");
+	}
+
+	*eos = 1; /* silence uninitialized warnings */
+
+	return 0;
+} /* checkvector() */
+
+
+static int match_k(lua_State *L) {
+ const char *o, *s, *r;
+ size_t l, sidx;
+ _Bool eos;
+ int ptop;
+ popvector(L, &o, &l, &eos);
+ sidx = lua_tounsigned(L, -1);
+ lua_pop(L, 1); /* pop sidx */
+ s = getsptr(o, sidx);
+ lua_getctx(L, &ptop);
+ r = match(L, o, s, o + l, eos, ptop);
+ if (r == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+ return getcaptures(L, o, r, ptop);
+} /* match_k() */
+
+
+static int match_l (lua_State *L) {
 Capture capture[INITCAPSIZE];
+ Stack stack[INITBACK];
 const char *r;
 size_t l;
+ _Bool eos;
 Instruction *p = getpatt(L, 1, NULL);
- const char *s = luaL_checklstring(L, SUBJIDX, &l);
+ const char *s = checkvector(L, &l, &eos);
 int ptop = lua_gettop(L);
 lua_Integer ii = luaL_optinteger(L, 3, 1);
 size_t i = (ii > 0) ?
@@ -2340,8 +2566,15 @@
 (((size_t)-ii <= l) ? l - ((size_t)-ii) : 0);
 lua_pushnil(L); /* subscache */
 lua_pushlightuserdata(L, capture); /* caplistidx */
+ lua_pushnumber(L, INITCAPSIZE); /* capsizeidx */
+ lua_pushnumber(L, 0); /* captopidx */
 lua_getfenv(L, 1); /* penvidx */
- r = match(L, s, s + i, s + l, p, capture, ptop);
+ lua_pushlightuserdata(L, stack); /* stackidx */
+ lua_pushnumber(L, INITBACK); /* stsizeidx */
+ lua_pushnumber(L, 1); /* sttopidx */
+ stack[0].p = &giveup; stack[0].s = i; stack[0].caplevel = 0;
+ lua_pushlightuserdata(L, p); /* instidx */
+ r = match(L, s, s + i, s + l, eos, ptop);
 if (r == NULL) {
 lua_pushnil(L);
 return 1;
@@ -2351,7 +2584,7 @@
 
 
 static struct luaL_Reg pattreg[] = {
- {"match", matchl},
+ {"match", match_l},
 {"print", printpat_l},
 {"locale", locale_l},
 {"setmaxstack", setmax},