String Query


stringquery is a string pattern matching and transformation library inspired partly by [jQuery].

Description

Primary design qualities are

The semblance to jQuery is that we construct a query object containing a set of selections for the thing being studied, we then perform a series of chained method calls to refine and alter those selections (all at once as a set), and finally we perform an operation on those selections (e.g. replace or return). See examples in the below test suite.

Status

The design of this library is preliminary and originally intended only as an experiment. Improvements welcome.

Author

DavidManura

test_stringquery.lua

-- test_stringquery.lua
-- test of dmlib.stringquery.
local SQ = require "dmlib.stringquery"
local sq = SQ.sq
local function asserteq(a,b)
 if a ~= b then
 error('[' .. tostring(a) .. '] ~= [' .. tostring(b) .. ']', 2)
 end
end
assert(
 sq("this is a test"):match("%w+"):replace('_')
 == '_ _ _ _'
)
assert(
 sq('<p>this is a <a href="/">test</a> http://lua-users.org http://lua.org </p>')
 :match("<[^>]*>")
 :invert()
 :match('http://[^ ]+')
 :filter('user')
 :replace(function(s) return '<a href="' .. s .. '">' .. s .. '</a>' end)
 == '<p>this is a <a href="/">test</a> <a href="http://lua-users.org">' ..
 'http://lua-users.org</a> http://lua.org </p>'
)
assert(
 sq("the red book, the green book, and the blue book")
 :match("%w+ book")
 :filter(SQ.any("^green ", "^red"))
 :replace(string.upper)
 == 'the RED BOOK, the GREEN BOOK, and the blue book'
)
-- solution to problem from http://lua-users.org/wiki/FrontierPattern
assert(
 sq("the QUICK BROwn fox")
 :match("%w+")
 :filter("^[A-Z]*$")
 :get_unpacked()
 == 'QUICK'
)
-- examples in docs
asserteq(
 table.concat(sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get(), ','),
 'A,BETA,G,A' )
asserteq(
 table.concat(sq'this is a test':match'%w+':filter'^....$':get(), ','),
 'this,test' )
asserteq(
 table.concat(sq'123 abc 234':match'%a+':invert():get(), ','),
 '123 , 234' )
asserteq(
 table.concat({sq'this is a test':match'%w+':get_unpacked()}, ','),
 'this,is,a,test' )
asserteq(
 table.concat(sq'hello':get(), ','),
 'hello' )
asserteq(
 SQ.any('%a%d', '%d%a')(' a1 '), true )
asserteq(
 SQ.all('%a%d', '%d%a')(' a1 2b '), true )
print 'DONE'

dmlib/stringquery.lua

-- dmlib/stringquery.lua (dmlib.stringquery)
--
-- String matching/replacing library inspired partly by jquery
--
-- Warning: preliminary design.
--
-- (c) 2009 David Manura, Licensed under the same terms as Lua (MIT license).
local M = {}
-- Replace array part of table dst with array part of table src.
local function tioverride(dst, src)
 for k in ipairs(dst) do dst[k] = nil end
 for k, v in ipairs(src) do dst[k] = v end
 return dst
end
-- Returns array of substrings in s, paritioned
-- by array of ranges (1-based start and end indicies).
-- Always returns odd-number of substrings (even indexed
-- substrings are inside the ranges).
-- Example:
-- partition("abcdefg", {{1,2},{4,5}})
-- --> {'','ab', 'c','de', 'fg'}
local function partition(s, ranges)
 local result = {}
 local i = 1
 for _,range in ipairs(ranges) do
 local ia,ib = unpack(range)
 table.insert(result, s:sub(i,ia-1))
 table.insert(result, s:sub(ia,ib))
 i = ib+1
 end
 table.insert(result, s:sub(i))
 return result
end
-- Helper function.
-- Processes argument, allowing function or
-- pattern matching function represented as string.
local function getarg(o)
 local f
 if type(o) == 'string' then
 f = function(s) return s:match(o) end
 else
 f = o
 end
 return f
end
local mt = {}
mt.__index = mt
-- Defines new selections based on matches of
-- pattern inside current selections.
-- Example:
-- sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get()
-- --> {'A', 'BETA', 'G', 'A'}
function mt:match(pat)
 local results = {}
 for _,range in ipairs(self) do
 local ia0,ib0 = unpack(range)
 local stmp = self.s:sub(ia0,ib0)
 local ia = 1
 repeat
 local ib
 ia,ib = stmp:find(pat, ia)
 if ia then
 table.insert(results, {ia+ia0-1,ib+ia0-1})
 ia = ib+1
 end
 until not ia
 end
 tioverride(self, results)
 return self
end
-- Defines new selections based only on current selections
-- that match object o. o can be a function (s -> b),
-- return returns Boolean b whether string s matches.
-- Alternately o can be a string pattern.
-- Example:
-- sq'this is a test':match'%w+':filter'^....$':get()
-- --> {'this', 'test'}
function mt:filter(o)
 local f = getarg(o)
 local result = {}
 for _,range in ipairs(self) do
 local ia,ib = unpack(range)
 local si = self.s:sub(ia,ib)
 if f(si) then
 table.insert(result, {ia,ib})
 end
 end
 tioverride(self, result)
 return self
end
-- Defines new selections that form the inverse (compliment)
-- of the current selections.
-- warning: might not be fully correct (e.g. would
-- sq(s):invert():invert() == sq(s)?).
-- Example:
-- sq'123 abc 234':match'%a+':invert():get()
-- --> {'123 ', ' 234'}
function mt:invert()
 local result = {}
 local i=1
 for _,range in ipairs(self) do
 local ia,ib = unpack(range)
 if ia > i then
 table.insert(result, {i,ia-1})
 end
 i = ib+1
 end
 if i < #self.s then
 table.insert(result, {i,#self.s})
 end
 tioverride(self, result)
 return self
end
-- Replace selections using o and return string.
-- o can be a function (s1 -> s2) that indicates that
-- string s1 should be replaced with string s2).
-- Alternately, o can be a string that all selections
-- will be replaced with.
function mt:replace(o)
 local f
 if type(o) == 'string' then
 f = function(s) return o end
 else
 f = o
 end
 local result = partition(self.s, self)
 for i=2,#result,2 do
 result[i] = f(result[i]) or ''
 end
 return table.concat(result, '')
end
-- Returns all string selections as array.
-- Example:
-- sq'this is a test':match'%w+':get()
-- --> {'this', 'is', 'a', 'test'}
function mt:get()
 local parts = partition(self.s, self)
 local result = {}
 for i=2,#parts,2 do
 table.insert(result, parts[i])
 end
 return result 
end
-- Returns all string selections as unpacked list.
-- Example:
-- sq'this is a test':match'%w+':get()
-- --> 'this', 'is', 'a', 'test'
function mt:get_unpacked()
 return unpack(self:get())
end
-- Prints selections.
-- For debugging. Requires penlight 0.6.3
function mt:print_dump()
 local dump = require "pl.pretty" . write
 print(dump(self))
 return self
end
-- Constructor for string query given string s.
-- The selection by default is the entire string.
-- Example:
-- sq'hello':get() --> {'hello'}
local function sq(s)
 return setmetatable({s=s, {1,#s}}, mt)
end
M.sq = sq
-- Returns a predicate function that matches
-- *any* of the given predicate functions.
-- Predicates can also be represented as string
-- patterns.
-- Useful for sq(s):filter.
-- Example:
-- any('%a%d', '%d%a')(' a1 ') --> true
local function any(...)
 local os = {...}
 for i,v in ipairs(os) do os[i] = getarg(v) end
 return function(s)
 for _,o in ipairs(os) do
 if o(s) then return true end
 end
 return false
 end
end
M.any = any
-- Returns a predicate function that matches
-- *all* of the given predicate functions.
-- Predicates can also be represented as string
-- patterns.
-- Useful for sq(s):filter.
-- Example:
-- all('%a%d', '%d%a')(' a1 2b ') --> true
local function all(...)
 local os = {...}
 for i,v in ipairs(os) do os[i] = getarg(v) end
 return function(s)
 for _,o in ipairs(os) do
 if not o(s) then return false end
 end
 return true
 end
end
M.all = all
return M

See Also


RecentChanges · preferences
edit · history
Last edited October 31, 2009 2:46 pm GMT (diff)

AltStyle によって変換されたページ (->オリジナル) /