Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 3cb6222

Browse files
authored
add fixed-length reader.Read() implementation (#118)
1 parent bf59862 commit 3cb6222

File tree

2 files changed

+180
-14
lines changed

2 files changed

+180
-14
lines changed

‎extensions/omniv21/fileformat/fixedlength/reader.go‎

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,41 @@ func (r *reader) readByHeaderFooterEnvelope() (*idr.Node, error) {
135135
}
136136
}
137137

138+
func (r *reader) Read() (node *idr.Node, err error) {
139+
if r.target != nil {
140+
// This is just in case Release() isn't called by ingester.
141+
idr.RemoveAndReleaseTree(r.target)
142+
r.target = nil
143+
}
144+
readEnvelope:
145+
if r.decl.envelopeType() == envelopeTypeByRows {
146+
node, err = r.readByRowsEnvelope()
147+
if err != nil {
148+
return nil, err
149+
}
150+
idr.AddChild(r.root, node)
151+
} else {
152+
node, err = r.readByHeaderFooterEnvelope()
153+
if err != nil {
154+
return nil, err
155+
}
156+
idr.AddChild(r.root, node)
157+
if r.decl.Envelopes[r.envelopeIndex].NotTarget {
158+
// If this by_header_footer envelope isn't target envelope then we consider it
159+
// a global envelope and keep it in the idr tree.
160+
goto readEnvelope
161+
}
162+
}
163+
// now the envelope is the target envelope, let's do a target xpath filtering.
164+
// if it filters out, then we need to remove it from the idr tree.
165+
if r.xpath != nil && !idr.MatchAny(node, r.xpath) {
166+
idr.RemoveAndReleaseTree(node)
167+
goto readEnvelope
168+
}
169+
r.target = node
170+
return node, err
171+
}
172+
138173
func (r *reader) fmtErrStr(format string, args ...interface{}) string {
139174
return fmt.Sprintf("input '%s' line %d: %s", r.inputName, r.line, fmt.Sprintf(format, args...))
140175
}

‎extensions/omniv21/fileformat/fixedlength/reader_test.go‎

Lines changed: 145 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"strings"
88
"testing"
99

10+
"github.com/antchfx/xpath"
11+
"github.com/jf-tech/go-corelib/caches"
1012
"github.com/jf-tech/go-corelib/strs"
1113
"github.com/jf-tech/go-corelib/testlib"
1214
"github.com/stretchr/testify/assert"
@@ -20,17 +22,30 @@ func TestIsErrInvalidEnvelope(t *testing.T) {
2022
assert.False(t, IsErrInvalidEnvelope(errors.New("test")))
2123
}
2224

23-
func testReader(r io.Reader, decl *fileDecl) *reader {
25+
func testReader(tb testing.TB, r io.Reader, decl *fileDecl) *reader {
26+
return testReader2(tb, r, decl, "")
27+
}
28+
29+
func testReader2(tb testing.TB, r io.Reader, decl *fileDecl, xpathStr string) *reader {
2430
return &reader{
2531
inputName: "test",
2632
r: bufio.NewReader(r),
2733
decl: decl,
28-
line: 1,
34+
xpath: func() *xpath.Expr {
35+
if xpathStr == "" {
36+
return nil
37+
}
38+
xpathExpr, err := caches.GetXPathExpr(xpathStr)
39+
assert.NoError(tb, err)
40+
return xpathExpr
41+
}(),
42+
root: idr.CreateNode(idr.DocumentNode, "#root"),
43+
line: 1,
2944
}
3045
}
3146

3247
func TestReadLine(t *testing.T) {
33-
r := testReader(strings.NewReader("abc\n\nefg\n \nxyz\n"), nil)
48+
r := testReader(t, strings.NewReader("abc\n\nefg\n \nxyz\n"), nil)
3449
assert.Equal(t, 1, r.line)
3550

3651
line, err := r.readLine()
@@ -58,15 +73,17 @@ func TestReadLine(t *testing.T) {
5873
// io.EOF shouldn't bump up current line number.
5974
line, err = r.readLine()
6075
assert.Equal(t, io.EOF, err)
76+
assert.Nil(t, line)
6177
assert.Equal(t, 6, r.line)
6278

6379
// reading again should still return io.EOF and line number stays.
6480
line, err = r.readLine()
6581
assert.Equal(t, io.EOF, err)
82+
assert.Nil(t, line)
6683
assert.Equal(t, 6, r.line)
6784

6885
// Another scenario that io.Reader fails
69-
r = testReader(testlib.NewMockReadCloser("read error", nil), nil)
86+
r = testReader(t, testlib.NewMockReadCloser("read error", nil), nil)
7087
assert.Equal(t, 1, r.line)
7188
line, err = r.readLine()
7289
assert.Error(t, err)
@@ -77,7 +94,7 @@ func TestReadLine(t *testing.T) {
7794

7895
func TestReadByRowsEnvelope_ByRowsDefault(t *testing.T) {
7996
// default by_rows = 1
80-
r := testReader(strings.NewReader("abc\n\nefghijklmn\n \nxyz\n"),
97+
r := testReader(t, strings.NewReader("abc\n\nefghijklmn\n \nxyz\n"),
8198
&fileDecl{Envelopes: []*envelopeDecl{{
8299
Name: strs.StrPtr("env1"),
83100
Columns: []*columnDecl{
@@ -115,7 +132,7 @@ func TestReadByRowsEnvelope_ByRowsDefault(t *testing.T) {
115132
}
116133

117134
func TestReadByRowsEnvelope_ByRowsNonDefault(t *testing.T) {
118-
r := testReader(strings.NewReader("abcdefg\n\nhijklmn\n \nabc012345\n"),
135+
r := testReader(t, strings.NewReader("abcdefg\n\nhijklmn\n \nabc012345\n"),
119136
&fileDecl{Envelopes: []*envelopeDecl{{
120137
Name: strs.StrPtr("env1"),
121138
ByRows: testlib.IntPtr(3),
@@ -158,7 +175,7 @@ var (
158175
// BenchmarkReadByRowsEnvelope-8 624 1891740 ns/op 133140 B/op 9005 allocs/op
159176
func BenchmarkReadByRowsEnvelope(b *testing.B) {
160177
for i := 0; i < b.N; i++ {
161-
r := testReader(strings.NewReader(benchReadByRowsEnvelopeInput), benchReadByRowsEnvelopeDecl)
178+
r := testReader(b, strings.NewReader(benchReadByRowsEnvelopeInput), benchReadByRowsEnvelopeDecl)
162179
for {
163180
n, err := r.readByRowsEnvelope()
164181
if err != nil {
@@ -173,14 +190,14 @@ func BenchmarkReadByRowsEnvelope(b *testing.B) {
173190
}
174191

175192
func TestReadByHeaderFooterEnvelope_EOFBeforeStart(t *testing.T) {
176-
r := testReader(strings.NewReader(""), &fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
193+
r := testReader(t, strings.NewReader(""), &fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
177194
n, err := r.readByHeaderFooterEnvelope()
178195
assert.Equal(t, io.EOF, err)
179196
assert.Nil(t, n)
180197
}
181198

182199
func TestReadByHeaderFooterEnvelope_ReadErrorBeforeStart(t *testing.T) {
183-
r := testReader(
200+
r := testReader(t,
184201
testlib.NewMockReadCloser("read error", nil),
185202
&fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
186203
n, err := r.readByHeaderFooterEnvelope()
@@ -191,7 +208,7 @@ func TestReadByHeaderFooterEnvelope_ReadErrorBeforeStart(t *testing.T) {
191208
}
192209

193210
func TestReadByHeaderFooterEnvelope_NoEnvelopeMatch(t *testing.T) {
194-
r := testReader(
211+
r := testReader(t,
195212
strings.NewReader("efg"),
196213
&fileDecl{Envelopes: []*envelopeDecl{{
197214
Name: strs.StrPtr("env1"),
@@ -203,7 +220,7 @@ func TestReadByHeaderFooterEnvelope_NoEnvelopeMatch(t *testing.T) {
203220
}
204221

205222
func TestReadByHeaderFooterEnvelope_IncompleteEnvelope(t *testing.T) {
206-
r := testReader(
223+
r := testReader(t,
207224
strings.NewReader("abc"),
208225
&fileDecl{Envelopes: []*envelopeDecl{{
209226
Name: strs.StrPtr("env1"),
@@ -218,8 +235,8 @@ func TestReadByHeaderFooterEnvelope_IncompleteEnvelope(t *testing.T) {
218235

219236
func lf(s string) string { return s + "\n" }
220237

221-
func TestReadByHeaderFooterEnvelope_Success1(t *testing.T) {
222-
r := testReader(
238+
func TestReadByHeaderFooterEnvelope_Success(t *testing.T) {
239+
r := testReader(t,
223240
strings.NewReader(
224241
lf("begin")+
225242
lf("header-01")+
@@ -311,7 +328,7 @@ var (
311328
// BenchmarkReadByHeaderFooterEnvelope-8 310 3819649 ns/op 213840 B/op 14009 allocs/op
312329
func BenchmarkReadByHeaderFooterEnvelope(b *testing.B) {
313330
for i := 0; i < b.N; i++ {
314-
r := testReader(strings.NewReader(benchReadByHeaderFooterEnvelopeInput), benchReadByHeaderFooterEnvelopeDecl)
331+
r := testReader(b, strings.NewReader(benchReadByHeaderFooterEnvelopeInput), benchReadByHeaderFooterEnvelopeDecl)
315332
for {
316333
n, err := r.readByHeaderFooterEnvelope()
317334
if err != nil {
@@ -324,3 +341,117 @@ func BenchmarkReadByHeaderFooterEnvelope(b *testing.B) {
324341
}
325342
}
326343
}
344+
345+
func TestRead_ByRows(t *testing.T) {
346+
r := testReader2(t,
347+
strings.NewReader(
348+
// data block 1
349+
lf("a001-abc")+
350+
lf("a002-def")+
351+
lf("a003-ghi")+
352+
// data block 2
353+
lf("a001-!@#")+
354+
lf("a002-$%^")+
355+
lf("a003-&*(")+
356+
// data block 3
357+
lf("a001-012")+
358+
lf("a002-345")+
359+
lf("a003-678")),
360+
&fileDecl{Envelopes: []*envelopeDecl{
361+
{
362+
Name: strs.StrPtr("data"),
363+
ByRows: testlib.IntPtr(3),
364+
Columns: []*columnDecl{
365+
{Name: "a001_first2chars", StartPos: 6, Length: 2, LinePattern: strs.StrPtr("^a001")},
366+
{Name: "a003_last2chars", StartPos: 7, Length: 2, LinePattern: strs.StrPtr("^a003")},
367+
{Name: "a001_last1char", StartPos: 8, Length: 1, LinePattern: strs.StrPtr("^a001")},
368+
},
369+
},
370+
}},
371+
".[not(contains(a001_first2chars, '!'))]")
372+
n, err := r.Read()
373+
assert.NoError(t, err)
374+
assert.Equal(t,
375+
`{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}`, idr.JSONify2(n))
376+
assert.Equal(t,
377+
`{"data":{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}}`, idr.JSONify2(r.root))
378+
379+
n, err = r.Read()
380+
assert.NoError(t, err)
381+
assert.Equal(t,
382+
`{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}`, idr.JSONify2(n))
383+
assert.Equal(t,
384+
`{"data":{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}}`, idr.JSONify2(r.root))
385+
386+
n, err = r.Read()
387+
assert.Equal(t, io.EOF, err)
388+
assert.Nil(t, n)
389+
}
390+
391+
func TestRead_ByHeaderFooter(t *testing.T) {
392+
r := testReader2(t,
393+
strings.NewReader(
394+
// global header
395+
lf("begin")+
396+
// data block 1
397+
lf("header-01")+
398+
lf("a001-abc")+
399+
lf("a002-def")+
400+
lf("a003-ghi")+
401+
lf("footer")+
402+
// data block 2
403+
lf("header-02")+
404+
lf("a001-!@#")+
405+
lf("a002-$%^")+
406+
lf("a003-&*(")+
407+
lf("footer")+
408+
// data block 3
409+
lf("header-03")+
410+
lf("a001-012")+
411+
lf("a002-345")+
412+
lf("a003-678")+
413+
lf("footer")+
414+
// global footer
415+
lf("end")),
416+
&fileDecl{Envelopes: []*envelopeDecl{
417+
{
418+
Name: strs.StrPtr("begin"),
419+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^begin", Footer: "^begin"},
420+
NotTarget: true,
421+
},
422+
{
423+
Name: strs.StrPtr("data"),
424+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^header", Footer: "^footer"},
425+
Columns: []*columnDecl{
426+
{Name: "a001_first2chars", StartPos: 6, Length: 2, LinePattern: strs.StrPtr("^a001")},
427+
{Name: "a003_last2chars", StartPos: 7, Length: 2, LinePattern: strs.StrPtr("^a003")},
428+
{Name: "a001_last1char", StartPos: 8, Length: 1, LinePattern: strs.StrPtr("^a001")},
429+
},
430+
},
431+
{
432+
Name: strs.StrPtr("end"),
433+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^end", Footer: "^end"},
434+
NotTarget: true,
435+
},
436+
}},
437+
".[not(contains(a001_first2chars, '!'))]")
438+
n, err := r.Read()
439+
assert.NoError(t, err)
440+
assert.Equal(t,
441+
`{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}`, idr.JSONify2(n))
442+
assert.Equal(t,
443+
`{"begin":{},"data":{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}}`,
444+
idr.JSONify2(r.root))
445+
446+
n, err = r.Read()
447+
assert.NoError(t, err)
448+
assert.Equal(t,
449+
`{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}`, idr.JSONify2(n))
450+
assert.Equal(t,
451+
`{"begin":{},"data":{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}}`,
452+
idr.JSONify2(r.root))
453+
454+
n, err = r.Read()
455+
assert.Equal(t, io.EOF, err)
456+
assert.Nil(t, n)
457+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /