Skip to content

Commit ba93495

Browse files
authored
Adding flatfile.csv.reader implementation and tests; fix a bug in flatfile.fixedlength where column match goes wrong. (#178)
The bug is in the `flatfile.fixedlength.reader.linesToNode`: ``` func (r *reader) linesToNode(decl *EnvelopeDecl, n int) *idr.Node { if len(r.linesBuf) < n { panic( fmt.Sprintf("linesBuf has %d lines but requested %d lines to convert", len(r.linesBuf), n)) } node := idr.CreateNode(idr.ElementNode, decl.Name) for col := range decl.Columns { colDecl := decl.Columns[col] for i := 0; i < n; i++ { if !colDecl.lineMatch(i, r.linesBuf[i].b) { continue } colNode := idr.CreateNode(idr.ElementNode, colDecl.Name) idr.AddChild(node, colNode) colVal := idr.CreateNode(idr.TextNode, colDecl.lineToColumnValue(r.linesBuf[i].b)) idr.AddChild(colNode, colVal) break <==== PREVIOUSLY MISSED. } } return node } ``` v1.0.3 release missed the crucial `break` statement, thus for any multiple-line envelope (whether its rows based or header/footer based), if there are multiple lines to match a column, always the last one wins. This is not what our past expectation/specification was: the first line that matches should win. It's fixed now and tests have been amended to catch the issue.
1 parent 77370e7 commit ba93495

14 files changed

+1042
-36
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"Children": [
3+
{
4+
"Children": [
5+
{
6+
"Children": null,
7+
"Data": ":?>",
8+
"FirstChild": null,
9+
"FormatSpecific": null,
10+
"LastChild": null,
11+
"NextSibling": null,
12+
"Parent": "(ElementNode c1)",
13+
"PrevSibling": null,
14+
"Type": "TextNode"
15+
}
16+
],
17+
"Data": "c1",
18+
"FirstChild": "(TextNode ':?>')",
19+
"FormatSpecific": null,
20+
"LastChild": "(TextNode ':?>')",
21+
"NextSibling": "(ElementNode c2)",
22+
"Parent": "(ElementNode test)",
23+
"PrevSibling": null,
24+
"Type": "ElementNode"
25+
},
26+
{
27+
"Children": [
28+
{
29+
"Children": null,
30+
"Data": "a",
31+
"FirstChild": null,
32+
"FormatSpecific": null,
33+
"LastChild": null,
34+
"NextSibling": null,
35+
"Parent": "(ElementNode c2)",
36+
"PrevSibling": null,
37+
"Type": "TextNode"
38+
}
39+
],
40+
"Data": "c2",
41+
"FirstChild": "(TextNode 'a')",
42+
"FormatSpecific": null,
43+
"LastChild": "(TextNode 'a')",
44+
"NextSibling": "(ElementNode c3)",
45+
"Parent": "(ElementNode test)",
46+
"PrevSibling": "(ElementNode c1)",
47+
"Type": "ElementNode"
48+
},
49+
{
50+
"Children": [
51+
{
52+
"Children": null,
53+
"Data": "2",
54+
"FirstChild": null,
55+
"FormatSpecific": null,
56+
"LastChild": null,
57+
"NextSibling": null,
58+
"Parent": "(ElementNode c3)",
59+
"PrevSibling": null,
60+
"Type": "TextNode"
61+
}
62+
],
63+
"Data": "c3",
64+
"FirstChild": "(TextNode '2')",
65+
"FormatSpecific": null,
66+
"LastChild": "(TextNode '2')",
67+
"NextSibling": null,
68+
"Parent": "(ElementNode test)",
69+
"PrevSibling": "(ElementNode c2)",
70+
"Type": "ElementNode"
71+
}
72+
],
73+
"Data": "test",
74+
"FirstChild": "(ElementNode c1)",
75+
"FormatSpecific": null,
76+
"LastChild": "(ElementNode c3)",
77+
"NextSibling": null,
78+
"Parent": null,
79+
"PrevSibling": null,
80+
"Type": "ElementNode"
81+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"Children": [
3+
{
4+
"Children": [
5+
{
6+
"Children": null,
7+
"Data": "v3",
8+
"FirstChild": null,
9+
"FormatSpecific": null,
10+
"LastChild": null,
11+
"NextSibling": null,
12+
"Parent": "(ElementNode r1c1)",
13+
"PrevSibling": null,
14+
"Type": "TextNode"
15+
}
16+
],
17+
"Data": "r1c1",
18+
"FirstChild": "(TextNode 'v3')",
19+
"FormatSpecific": null,
20+
"LastChild": "(TextNode 'v3')",
21+
"NextSibling": "(ElementNode r1c2)",
22+
"Parent": "(ElementNode r1)",
23+
"PrevSibling": null,
24+
"Type": "ElementNode"
25+
},
26+
{
27+
"Children": [
28+
{
29+
"Children": null,
30+
"Data": "v4",
31+
"FirstChild": null,
32+
"FormatSpecific": null,
33+
"LastChild": null,
34+
"NextSibling": null,
35+
"Parent": "(ElementNode r1c2)",
36+
"PrevSibling": null,
37+
"Type": "TextNode"
38+
}
39+
],
40+
"Data": "r1c2",
41+
"FirstChild": "(TextNode 'v4')",
42+
"FormatSpecific": null,
43+
"LastChild": "(TextNode 'v4')",
44+
"NextSibling": null,
45+
"Parent": "(ElementNode r1)",
46+
"PrevSibling": "(ElementNode r1c1)",
47+
"Type": "ElementNode"
48+
}
49+
],
50+
"Data": "r1",
51+
"FirstChild": "(ElementNode r1c1)",
52+
"FormatSpecific": null,
53+
"LastChild": "(ElementNode r1c2)",
54+
"NextSibling": null,
55+
"Parent": "(ElementNode g1)",
56+
"PrevSibling": null,
57+
"Type": "ElementNode"
58+
},
59+
{
60+
"Children": [
61+
{
62+
"Children": [
63+
{
64+
"Children": null,
65+
"Data": "",
66+
"FirstChild": null,
67+
"FormatSpecific": null,
68+
"LastChild": null,
69+
"NextSibling": null,
70+
"Parent": "(ElementNode r1c1)",
71+
"PrevSibling": null,
72+
"Type": "TextNode"
73+
}
74+
],
75+
"Data": "r1c1",
76+
"FirstChild": "(TextNode '')",
77+
"FormatSpecific": null,
78+
"LastChild": "(TextNode '')",
79+
"NextSibling": null,
80+
"Parent": "(ElementNode r1)",
81+
"PrevSibling": null,
82+
"Type": "ElementNode"
83+
}
84+
],
85+
"Data": "r1",
86+
"FirstChild": "(ElementNode r1c1)",
87+
"FormatSpecific": null,
88+
"LastChild": "(ElementNode r1c1)",
89+
"NextSibling": null,
90+
"Parent": "(ElementNode g1)",
91+
"PrevSibling": null,
92+
"Type": "ElementNode"
93+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"Children": null,
3+
"Data": "r1",
4+
"FirstChild": null,
5+
"FormatSpecific": null,
6+
"LastChild": null,
7+
"NextSibling": null,
8+
"Parent": "(DocumentNode)",
9+
"PrevSibling": null,
10+
"Type": "ElementNode"
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"Children": [
3+
{
4+
"Children": [
5+
{
6+
"Children": null,
7+
"Data": "line 1",
8+
"FirstChild": null,
9+
"FormatSpecific": null,
10+
"LastChild": null,
11+
"NextSibling": null,
12+
"Parent": "(ElementNode C)",
13+
"PrevSibling": null,
14+
"Type": "TextNode"
15+
}
16+
],
17+
"Data": "C",
18+
"FirstChild": "(TextNode 'line 1')",
19+
"FormatSpecific": null,
20+
"LastChild": "(TextNode 'line 1')",
21+
"NextSibling": null,
22+
"Parent": "(ElementNode E)",
23+
"PrevSibling": null,
24+
"Type": "ElementNode"
25+
}
26+
],
27+
"Data": "E",
28+
"FirstChild": "(ElementNode C)",
29+
"FormatSpecific": null,
30+
"LastChild": "(ElementNode C)",
31+
"NextSibling": null,
32+
"Parent": null,
33+
"PrevSibling": null,
34+
"Type": "ElementNode"
35+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"Children": [
3+
{
4+
"Children": [
5+
{
6+
"Children": null,
7+
"Data": "one",
8+
"FirstChild": null,
9+
"FormatSpecific": null,
10+
"LastChild": null,
11+
"NextSibling": null,
12+
"Parent": "(ElementNode C1)",
13+
"PrevSibling": null,
14+
"Type": "TextNode"
15+
}
16+
],
17+
"Data": "C1",
18+
"FirstChild": "(TextNode 'one')",
19+
"FormatSpecific": null,
20+
"LastChild": "(TextNode 'one')",
21+
"NextSibling": "(ElementNode C2)",
22+
"Parent": "(ElementNode E)",
23+
"PrevSibling": null,
24+
"Type": "ElementNode"
25+
},
26+
{
27+
"Children": [
28+
{
29+
"Children": null,
30+
"Data": "two",
31+
"FirstChild": null,
32+
"FormatSpecific": null,
33+
"LastChild": null,
34+
"NextSibling": null,
35+
"Parent": "(ElementNode C2)",
36+
"PrevSibling": null,
37+
"Type": "TextNode"
38+
}
39+
],
40+
"Data": "C2",
41+
"FirstChild": "(TextNode 'two')",
42+
"FormatSpecific": null,
43+
"LastChild": "(TextNode 'two')",
44+
"NextSibling": null,
45+
"Parent": "(ElementNode E)",
46+
"PrevSibling": "(ElementNode C1)",
47+
"Type": "ElementNode"
48+
}
49+
],
50+
"Data": "E",
51+
"FirstChild": "(ElementNode C1)",
52+
"FormatSpecific": null,
53+
"LastChild": "(ElementNode C2)",
54+
"NextSibling": null,
55+
"Parent": null,
56+
"PrevSibling": null,
57+
"Type": "ElementNode"
58+
}

extensions/omniv21/fileformat/flatfile/csv/decl.go

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package csv
33
import (
44
"fmt"
55
"regexp"
6+
"strings"
67

78
"github.com/jf-tech/go-corelib/maths"
89

@@ -19,17 +20,17 @@ type ColumnDecl struct {
1920
linePatternRegexp *regexp.Regexp
2021
}
2122

22-
func (c *ColumnDecl) lineMatch(lineIndex int, line line) bool {
23+
func (c *ColumnDecl) lineMatch(lineIndex int, line *line, delim string) bool {
2324
if c.LineIndex != nil {
2425
return *c.LineIndex == lineIndex+1 // c.LineIndex is 1 based.
2526
}
2627
if c.linePatternRegexp != nil {
27-
return c.linePatternRegexp.MatchString(line.raw)
28+
return matchLine(c.linePatternRegexp, line, delim)
2829
}
2930
return true
3031
}
3132

32-
func (c *ColumnDecl) lineToColumnValue(line line) string {
33+
func (c *ColumnDecl) lineToColumnValue(line *line) string {
3334
if *c.Index < 1 || *c.Index > len(line.record) {
3435
return ""
3536
}
@@ -121,21 +122,28 @@ func (r *RecordDecl) rows() int {
121122
return *r.Rows
122123
}
123124

124-
func (r *RecordDecl) matchHeader(line []byte) bool {
125+
func (r *RecordDecl) matchHeader(line *line, delim string) bool {
125126
if r.headerRegexp == nil {
126127
panic(fmt.Sprintf("record '%s' is not header/footer based", r.fqdn))
127128
}
128-
return r.headerRegexp.Match(line)
129+
return matchLine(r.headerRegexp, line, delim)
129130
}
130131

131132
// Footer is optional. If not specified, it always matches. Thus for a header/footer record,
132133
// if the footer isn't specified, it effectively becomes a single-row record matched by header,
133134
// given that after the header matches a line, matchFooter is called on the same line.
134-
func (r *RecordDecl) matchFooter(line []byte) bool {
135+
func (r *RecordDecl) matchFooter(line *line, delim string) bool {
135136
if r.footerRegexp == nil {
136137
return true
137138
}
138-
return r.footerRegexp.Match(line)
139+
return matchLine(r.footerRegexp, line, delim)
140+
}
141+
142+
func matchLine(re *regexp.Regexp, line *line, delim string) bool {
143+
if line.raw == "" {
144+
line.raw = strings.Join(line.record, delim)
145+
}
146+
return re.MatchString(line.raw)
139147
}
140148

141149
func toFlatFileRecDecls(rs []*RecordDecl) []flatfile.RecDecl {

0 commit comments

Comments
 (0)