Skip to content

Commit b2d0aeb

Browse files
author
Steve van Loben Sels
authored
Added Position field to the Tokenizer (#128)
The Position gives a means to index into the Tokenizer's underlying byte slice. This enables use cases where the caller is planning on making edits to the JSON document but wants to leverage the copy func to optimize data movement and/or to copy remaining bytes if the caller wants to exit the tokenizing loop early.
1 parent 3391c4a commit b2d0aeb

File tree

2 files changed

+38
-13
lines changed

2 files changed

+38
-13
lines changed

json/token.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
// ...
3232
// }
3333
// }
34-
//
3534
type Tokenizer struct {
3635
// When the tokenizer is positioned on a json delimiter this field is not
3736
// zero. In this case the possible values are '{', '}', '[', ']', ':', and
@@ -44,6 +43,17 @@ type Tokenizer struct {
4443
// null, true, false, numbers, or quoted strings.
4544
Value RawValue
4645

46+
// Position is the Tokenizer's current index into the underlying byte slice.
47+
// Since the Tokenizer has already been advanced by calling Next, this
48+
// position will be the first index of the next token. The position of
49+
// the current Value can be calculated by subtracting len(token.value).
50+
// Accordingly, slicing the underlying bytes like:
51+
//
52+
// b[token.Position-len(token.Value):token.Position]
53+
//
54+
// will yield the current Value.
55+
Position int
56+
4757
// When the tokenizer has encountered invalid content this field is not nil.
4858
Err error
4959

@@ -92,6 +102,7 @@ func (t *Tokenizer) Reset(b []byte) {
92102
// However, it does not compile down to an invocation of duff-copy.
93103
t.Delim = 0
94104
t.Value = nil
105+
t.Position = 0
95106
t.Err = nil
96107
t.Depth = 0
97108
t.Index = 0
@@ -128,13 +139,16 @@ skipLoop:
128139

129140
if i > 0 {
130141
t.json = t.json[i:]
142+
t.Position += i
131143
}
132144

133145
if len(t.json) == 0 {
134146
t.Reset(nil)
135147
return false
136148
}
137149

150+
lenBefore := len(t.json)
151+
138152
var kind Kind
139153
switch t.json[0] {
140154
case '"':
@@ -165,6 +179,8 @@ skipLoop:
165179
t.Value, t.json, t.Err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
166180
}
167181

182+
t.Position += lenBefore - len(t.json)
183+
168184
t.Depth = t.depth()
169185
t.Index = t.index()
170186
t.flags = t.flags.withKind(kind)

json/token_test.go

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package json
22

33
import (
4+
"bytes"
45
"reflect"
56
"testing"
67
)
@@ -40,22 +41,30 @@ func value(v string, depth, index int) token {
4041
}
4142
}
4243

43-
func tokenize(b []byte) (tokens []token) {
44-
t := NewTokenizer(b)
44+
func tokenize(t *testing.T, b []byte) (tokens []token) {
45+
tok := NewTokenizer(b)
46+
47+
for tok.Next() {
48+
start, end := tok.Position-len(tok.Value), tok.Position
49+
if end > len(b) {
50+
t.Fatalf("token position too far [%d:%d], len(b) is %d", start, end, len(b))
51+
}
52+
if !bytes.Equal(b[start:end], tok.Value) {
53+
t.Fatalf("token position is wrong [%d:%d]", start, end)
54+
}
4555

46-
for t.Next() {
4756
tokens = append(tokens, token{
48-
delim: t.Delim,
49-
value: t.Value,
50-
err: t.Err,
51-
depth: t.Depth,
52-
index: t.Index,
53-
isKey: t.IsKey,
57+
delim: tok.Delim,
58+
value: tok.Value,
59+
err: tok.Err,
60+
depth: tok.Depth,
61+
index: tok.Index,
62+
isKey: tok.IsKey,
5463
})
5564
}
5665

57-
if t.Err != nil {
58-
panic(t.Err)
66+
if tok.Err != nil {
67+
t.Fatal(tok.Err)
5968
}
6069

6170
return
@@ -174,7 +183,7 @@ func TestTokenizer(t *testing.T) {
174183

175184
for _, test := range tests {
176185
t.Run(string(test.input), func(t *testing.T) {
177-
tokens := tokenize(test.input)
186+
tokens := tokenize(t, test.input)
178187

179188
if !reflect.DeepEqual(tokens, test.tokens) {
180189
t.Error("tokens mismatch")

0 commit comments

Comments
 (0)