gdt-parse-csv.lua - gsl-shell.git - gsl-shell

index : gsl-shell.git
gsl-shell
summary refs log tree commit diff
path: root/gdt-parse-csv.lua
blob: 6c71288b9f2f0ccef1edc17e4b58a982d130b972 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
local ffi = require 'ffi'
local gdt = require 'gdt'
local csv = require 'csv'
local max = math.max
local match, gsub = string.match, string.gsub
local function is_string_only(ls)
	for _, s in ipairs(ls) do
		if tonumber(s) then return false end
	end
	return true
end
local function pre_parse_csv(source)
	local head_vs = source()
	local nrows, ncols = 1, #head_vs
	local all_strings = true
	local header_dup = {}
	for vs in source do
		if #vs == 0 then break end
		if all_strings then	all_strings = is_string_only(vs) end
		for k= 1, #vs do
			if head_vs[k] == vs[k] then header_dup[k] = true end
		end
		ncols = max(ncols, #vs)
		nrows = nrows + 1
	end
	local header_dup_count = 0
	for k= 1, ncols do
		if header_dup[k] then header_dup_count = header_dup_count + 1 end
	end
	local header_stand = (header_dup_count < ncols/2)
	local head_all_string = is_string_only(head_vs)
	local has_header = head_all_string and (header_stand or (not all_strings))
	if has_header then nrows = nrows - 1 end
	return nrows, ncols, has_header
end
local function is_not_empty(s)
	return (match(s, '^%s*$') == nil)
end
local function gdt_parse(source_init)
	local source = source_init()
	local nrows, ncols, has_header = pre_parse_csv(source)
	local t = gdt.alloc(nrows, ncols)
	source = source_init()
	if has_header then
		local vs = source()
		for k, s in ipairs(vs) do
			t:set_header(k, s)
		end
	end
	local i = 1
	for vs in source do
		if #vs == 0 then break end
		for j = 1, ncols do
			local v = (vs[j] and is_not_empty(vs[j]) and vs[j] or nil)
			gdt.set(t, i, j, v)
		end
		i = i + 1
	end
	return t
end
local function trim_spaces(line)
	for j = 1, #line do
		if type(line[j]) == 'string' then
			local a = gsub(line[j], "^%s+", "")
			line[j] = gsub(a, "%s+$", "")
		end
	end
end
local function source_csv(filename, options)
	local strip_spaces = true
	if options and (options.strip_spaces ~= nil) then
		strip_spaces = options.strip_spaces
	end
	local f
	local it, s, i
	local source = function()
		local line = it(s, i)
		if line then
			local ls = csv.line(line)
			if strip_spaces then trim_spaces(ls) end
			return ls
		else
			f:close()
		end
	end
	return function()
		f = assert(io.open(filename, 'r'), 'cannot open file: ' .. filename)
		it, s, i = f:lines()
		return source
	end
end
local function source_def(def)
	local n, i = #def, 0
	local source = function()
		if i + 1 <= n then
			i = i + 1
			return def[i]
		end
	end
	return function() i = 0; return source end
end
local function csv_format(x)
	if type(x) == 'number' then
		return x
	elseif type(x) == 'string' then
		if match(x, "^%a[%w_]+$") then
			return x
		else
			local cs = {}
			for i = 1, #x do
				local c = x:sub(i, i)
				if c == '"' then
					cs[#cs+1] = '"'
					cs[#cs+1] = '"'
				else
					cs[#cs+1] = c
				end
			end
			return string.format("\"%s\"", table.concat(cs, ""))
		end
	else
		return ""
	end
end
local function write_csv_row(f, row, nc)
	local rf = {}
	for i = 1, nc do
		rf[i] = csv_format(row[i])
	end
	f:write(string.format("%s\n", table.concat(rf, ",")))
end
function gdt.write_csv(t, filename)
	local f = assert(io.open(filename, "w"))
	local hs = t:headers()
	local nc = #hs
	write_csv_row(f, hs, nc)
	for i, r in t:rows() do
		local s = {}
		for j, k in ipairs(hs) do s[j] = r[k] end
		write_csv_row(f, s, nc)
	end
	f:close()
end
gdt.read_csv = function(filename, options) return gdt_parse(source_csv(filename, options)) end
gdt.def = function(def) return gdt_parse(source_def(def)) end
generated by cgit v1.2.3 (git 2.25.1) at 2025年09月11日 11:32:59 +0000

AltStyle によって変換されたページ (->オリジナル) /