Skip to content

Commit 8c77662

Browse files
authored
Adding column index and line matching attributes (line_index, and line_pattern) to csv2 column decl and json validation schema (#175)
* Adding column index and line matching attributes (line_index, and line_pattern) to csv2 column decl and json validation schema Turns out if we want to support multi-line record (whether it's fixed number of rows based or header/footer based) in csv we will have to let each column to specify which line the column value should come from, and of that particular line, which indexed value to use, thus the adding of `index`, `line_index` and `line_pattern`.
1 parent f003f99 commit 8c77662

File tree

8 files changed

+149
-10
lines changed

8 files changed

+149
-10
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
([]*csv.ColumnDecl) (len=3) {
2+
(*csv.ColumnDecl)({
3+
Name: (string) (len=2) "c1",
4+
Index: (*int)(1),
5+
LineIndex: (*int)(1),
6+
LinePattern: (*string)(<nil>),
7+
linePatternRegexp: (*regexp.Regexp)(<nil>)
8+
}),
9+
(*csv.ColumnDecl)({
10+
Name: (string) (len=2) "c2",
11+
Index: (*int)(3),
12+
LineIndex: (*int)(<nil>),
13+
LinePattern: (*string)(<nil>),
14+
linePatternRegexp: (*regexp.Regexp)(<nil>)
15+
}),
16+
(*csv.ColumnDecl)({
17+
Name: (string) (len=2) "c3",
18+
Index: (*int)(4),
19+
LineIndex: (*int)(<nil>),
20+
LinePattern: (*string)((len=3) "^C$"),
21+
linePatternRegexp: (*regexp.Regexp)(^C$)
22+
})
23+
}

extensions/omniv21/fileformat/flatfile/csv/decl.go

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,30 @@ import (
1111

1212
// ColumnDecl describes a column of an csv record column.
1313
type ColumnDecl struct {
14-
Name string `json:"name,omitempty"`
14+
Name string `json:"name,omitempty"`
15+
Index *int `json:"index,omitempty"` // 1-based. optional.
16+
LineIndex *int `json:"line_index,omitempty"` // 1-based. optional
17+
LinePattern *string `json:"line_pattern,omitempty"` // optional
18+
19+
linePatternRegexp *regexp.Regexp
1520
}
1621

17-
// Design note: given currently ColumnDecl contains only Name field, we could've simply
18-
// change RecordDecl.Columns into a []string. But in the future, if we ever need to add
19-
// anything to a column decl, we'd have to introduce a breaking schema change.
22+
func (c *ColumnDecl) lineMatch(lineIndex int, line line) bool {
23+
if c.LineIndex != nil {
24+
return *c.LineIndex == lineIndex+1 // c.LineIndex is 1 based.
25+
}
26+
if c.linePatternRegexp != nil {
27+
return c.linePatternRegexp.MatchString(line.raw)
28+
}
29+
return true
30+
}
31+
32+
func (c *ColumnDecl) lineToColumnValue(line line) string {
33+
if *c.Index < 1 || *c.Index > len(line.record) {
34+
return ""
35+
}
36+
return line.record[*c.Index-1]
37+
}
2038

2139
const (
2240
typeRecord = "record"

extensions/omniv21/fileformat/flatfile/csv/decl_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,25 @@ import (
1111
"github.com/stretchr/testify/assert"
1212
)
1313

14+
func TestColumnDecl_LineMatch(t *testing.T) {
15+
assert.True(t, (&ColumnDecl{}).lineMatch(0, line{}))
16+
assert.False(t, (&ColumnDecl{LineIndex: testlib.IntPtr(2)}).lineMatch(0, line{}))
17+
assert.True(t, (&ColumnDecl{LineIndex: testlib.IntPtr(2)}).lineMatch(1, line{}))
18+
assert.False(t, (&ColumnDecl{linePatternRegexp: regexp.MustCompile("^ABC.*$")}).
19+
lineMatch(0, line{raw: "1234567"}))
20+
assert.True(t, (&ColumnDecl{linePatternRegexp: regexp.MustCompile("^ABC.*$")}).
21+
lineMatch(0, line{raw: "ABCDEFG"}))
22+
}
23+
24+
func TestColumnDecl_LineToColumnValue(t *testing.T) {
25+
assert.Equal(t, "", (&ColumnDecl{Index: testlib.IntPtr(2)}).lineToColumnValue(
26+
line{record: []string{"1"}})) // index out of range
27+
assert.Equal(t, "", (&ColumnDecl{Index: testlib.IntPtr(0)}).lineToColumnValue(
28+
line{record: []string{"1"}})) // index out of range
29+
assert.Equal(t, "9", (&ColumnDecl{Index: testlib.IntPtr(5)}).lineToColumnValue(
30+
line{record: []string{"1", "3", "5", "7", "9", "11"}})) // in range
31+
}
32+
1433
func TestRecordDecl(t *testing.T) {
1534
// DeclName()
1635
r := &RecordDecl{Name: "r1"}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package csv
2+
3+
type line struct {
4+
lineNum int // 1-based
5+
record []string
6+
raw string
7+
}

extensions/omniv21/fileformat/flatfile/csv/validate.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ func (ctx *validateCtx) validateRecordDecl(fqdn string, decl *RecordDecl) (err e
6262
return fmt.Errorf("record/record_group '%s' has 'min' value %d > 'max' value %d",
6363
fqdn, decl.MinOccurs(), decl.MaxOccurs())
6464
}
65+
for i, col := range decl.Columns {
66+
prevCol := (*ColumnDecl)(nil)
67+
if i > 0 {
68+
prevCol = decl.Columns[i-1]
69+
}
70+
if err = ctx.validateColumnDecl(fqdn, prevCol, col); err != nil {
71+
return err
72+
}
73+
}
6574
for _, c := range decl.Children {
6675
if err = ctx.validateRecordDecl(strs.BuildFQDN2("/", fqdn, c.Name), c); err != nil {
6776
return err
@@ -70,3 +79,33 @@ func (ctx *validateCtx) validateRecordDecl(fqdn string, decl *RecordDecl) (err e
7079
decl.childRecDecls = toFlatFileRecDecls(decl.Children)
7180
return nil
7281
}
82+
83+
func intPtr(v int) *int {
84+
return &v
85+
}
86+
87+
func (ctx *validateCtx) validateColumnDecl(fqdn string, prevDecl, decl *ColumnDecl) (err error) {
88+
// If column.index not specified, then we'll use the previous column's index value + 1 unless
89+
// the column is the first column, then 1 will be used.
90+
// if column.index is explicitly specified, it will be honored.
91+
if decl.Index == nil {
92+
if prevDecl == nil {
93+
decl.Index = intPtr(1)
94+
} else {
95+
decl.Index = intPtr(*prevDecl.Index + 1)
96+
}
97+
}
98+
if decl.LineIndex != nil && decl.LinePattern != nil {
99+
return fmt.Errorf(
100+
"record '%s' column '%s' cannot have both `line_index` and `line_pattern` specified at the same time",
101+
fqdn, decl.Name)
102+
}
103+
if decl.LinePattern != nil {
104+
if decl.linePatternRegexp, err = caches.GetRegex(*decl.LinePattern); err != nil {
105+
return fmt.Errorf(
106+
"record '%s' column '%s' has an invalid 'line_pattern' regexp '%s': %s",
107+
fqdn, decl.Name, *decl.LinePattern, err.Error())
108+
}
109+
}
110+
return nil
111+
}

extensions/omniv21/fileformat/flatfile/csv/validate_test.go

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package csv
33
import (
44
"testing"
55

6+
"github.com/bradleyjkemp/cupaloy"
67
"github.com/jf-tech/go-corelib/strs"
78
"github.com/jf-tech/go-corelib/testlib"
89
"github.com/stretchr/testify/assert"
@@ -99,10 +100,36 @@ func TestValidateFileDecl_MinGreaterThanMax(t *testing.T) {
99100
assert.Equal(t, `record/record_group 'A/B' has 'min' value 2 > 'max' value 1`, err.Error())
100101
}
101102

103+
func TestValidateFileDecl_ColumnLineIndexAndLinePatternSameTime(t *testing.T) {
104+
err := (&validateCtx{}).validateFileDecl(&FileDecl{
105+
Records: []*RecordDecl{
106+
{Name: "A", Columns: []*ColumnDecl{
107+
{Name: "c", LineIndex: testlib.IntPtr(2), LinePattern: strs.StrPtr(".")}}},
108+
},
109+
})
110+
assert.Error(t, err)
111+
assert.Equal(t,
112+
"record 'A' column 'c' cannot have both `line_index` and `line_pattern` specified at the same time",
113+
err.Error())
114+
}
115+
116+
func TestValidateFileDecl_InvalidColumnLinePattern(t *testing.T) {
117+
err := (&validateCtx{}).validateFileDecl(&FileDecl{
118+
Records: []*RecordDecl{
119+
{Name: "A", Columns: []*ColumnDecl{
120+
{Name: "c", LinePattern: strs.StrPtr("[invalid")}}},
121+
},
122+
})
123+
assert.Error(t, err)
124+
assert.Equal(t,
125+
"record 'A' column 'c' has an invalid 'line_pattern' regexp '[invalid': error parsing regexp: missing closing ]: `[invalid`",
126+
err.Error())
127+
}
128+
102129
func TestValidateFileDecl_Success(t *testing.T) {
103-
col1 := &ColumnDecl{Name: "c1"}
104-
col2 := &ColumnDecl{Name: "c2"}
105-
col3 := &ColumnDecl{Name: "c3"}
130+
col1 := &ColumnDecl{Name: "c1", LineIndex: testlib.IntPtr(1)}
131+
col2 := &ColumnDecl{Name: "c2", Index: testlib.IntPtr(3)}
132+
col3 := &ColumnDecl{Name: "c3", LinePattern: strs.StrPtr("^C$")}
106133
fd := &FileDecl{
107134
Records: []*RecordDecl{
108135
{
@@ -126,5 +153,5 @@ func TestValidateFileDecl_Success(t *testing.T) {
126153
assert.Equal(t, 1, len(fd.Records[0].childRecDecls))
127154
assert.Same(t, fd.Records[0].Children[0], fd.Records[0].childRecDecls[0].(*RecordDecl))
128155
assert.Equal(t, "A/B", fd.Records[0].Children[0].fqdn)
129-
assert.Equal(t, []*ColumnDecl{col1, col2, col3}, fd.Records[0].Children[0].Columns)
156+
cupaloy.SnapshotT(t, fd.Records[0].Children[0].Columns)
130157
}

extensions/omniv21/validation/csv2FileDeclaration.go

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

extensions/omniv21/validation/csv2FileDeclaration.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@
7777
"items": {
7878
"type": "object",
7979
"properties": {
80-
"name": { "type": "string", "minLength": 1 }
80+
"name": { "type": "string", "minLength": 1 },
81+
"index": { "type": "integer", "minimum": 1 },
82+
"line_index": { "type": "integer", "minimum": 1 },
83+
"line_pattern": { "type": "string", "minLength": 1 }
8184
},
8285
"required": [ "name" ],
8386
"additionalProperties": false

0 commit comments

Comments
 (0)