lua-users home
lua-l archive

Load large amount of data fast

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Hi, list!
Apologies for a lazy question, I have not done my own homework.
I've got a large file (3M entries, 250 MB) with data.
Each entry is one line with a small Lua table:
{ foo = 1; bar = 2; baz = 'text' };
(Actually, there are two different entry formats, but that does not matter.)
I need to load this data fast enough. (Faster than several hours that
my original loader runs on LJ2, and it still had not stopped.)
So, if you know an implementation than ad-hoc unoptimized one below,
please share.
Alexander.
 local load_huge_table_list = function(filename)
 local result = { }
 local env =
 {
 A_ = function(t) result[#result + 1] = t end; -- TODO: Fragile?
 }
 local f = assert(io.open(filename, "r"))
 local MAX_LINES_IN_CHUNK = 1024
 local cur_lines_in_chunk = 0
 local function_open = false
 local header = "local A_=A_;\n" -- File header
 local next_line = nil
 local need_open_line = false
 local need_close_line = false
 -- local w = function(f) return function() local s = f()
io.write(s or "") return s end end
 local chunk = assert(
 load(
 function()
 cur_lines_in_chunk = cur_lines_in_chunk + 1 -- TODO: I
suspect off-by-one error here.
 while true do
 if header then
 local str = header
 header = nil
 return str
 end
 if next_line then
 if need_open_line then
 need_open_line = false
 return "A_"
 elseif need_close_line then
 need_close_line = false
 next_line = false
 return "\n"
 else
 need_close_line = true
 return next_line
 end
 end
 if not f then
 return nil
 end
 next_line = f:read("*l") -- TODO: Read larger chunks?
 if not next_line then
 f:close()
 f = nil
 if function_open then
 function_open = false
 return "end)();\n"
 end
 return nil -- EOF
 end
 need_open_line = true
 if not function_open then
 function_open = true
 return "(function()\n"
 elseif cur_lines_in_chunk > MAX_LINES_IN_CHUNK then --
TODO: And here symmetrical off-by-one?
 assert(function_open)
 cur_lines_in_chunk = 0
 return "end)();\n(function()\n"
 end
 end
 end,
 "="..filename
 )
 )
 assert(not f)
 setfenv(chunk, env)
 chunk()
 return result
 end

AltStyle によって変換されたページ (->オリジナル) /