11package jsonparser
22
33import (
4+ "bytes"
5+ "fmt"
46 "testing"
57)
68
@@ -15,37 +17,199 @@ func TestH2I(t *testing.T) {
1517 }
1618}
1719
20+ type escapedUnicodeRuneTest struct {
21+ in string
22+ isErr bool
23+ out rune
24+ len int
25+ }
26+
27+ var commonUnicodeEscapeTests = []escapedUnicodeRuneTest {
28+ {in : `\u0041` , out : 'A' , len : 6 },
29+ {in : `\u0000` , out : 0 , len : 6 },
30+ {in : `\u00b0` , out : '°' , len : 6 },
31+ {in : `\u00B0` , out : '°' , len : 6 },
32+
33+ {in : `\x1234` , out : 0x1234 , len : 6 }, // These functions do not check the \u prefix
34+
35+ {in : `` , isErr : true },
36+ {in : `\` , isErr : true },
37+ {in : `\u` , isErr : true },
38+ {in : `\u1` , isErr : true },
39+ {in : `\u11` , isErr : true },
40+ {in : `\u111` , isErr : true },
41+ {in : `\u123X` , isErr : true },
42+ }
43+
44+ var singleUnicodeEscapeTests = append ([]escapedUnicodeRuneTest {
45+ {in : `\uD83D` , out : 0xD83D , len : 6 },
46+ {in : `\uDE03` , out : 0xDE03 , len : 6 },
47+ {in : `\uFFFF` , out : 0xFFFF , len : 6 },
48+ }, commonUnicodeEscapeTests ... )
49+
50+ var multiUnicodeEscapeTests = append ([]escapedUnicodeRuneTest {
51+ {in : `\uD83D` , isErr : true },
52+ {in : `\uDE03` , isErr : true },
53+ {in : `\uFFFF` , isErr : true },
54+
55+ {in : `\uD83D\uDE03` , out : '\U0001F603' , len : 12 },
56+ {in : `\uD800\uDC00` , out : '\U00010000' , len : 12 },
57+
58+ {in : `\uD800\` , isErr : true },
59+ {in : `\uD800\u` , isErr : true },
60+ {in : `\uD800\uD` , isErr : true },
61+ {in : `\uD800\uDC` , isErr : true },
62+ {in : `\uD800\uDC0` , isErr : true },
63+ }, commonUnicodeEscapeTests ... )
64+
1865func TestDecodeSingleUnicodeEscape (t * testing.T ) {
19- escapeSequences := []string {
20- `\"` ,
21- `\\` ,
22- `\n` ,
23- `\t` ,
24- `\r` ,
25- `\/` ,
26- `\b` ,
27- `\f` ,
66+ for _ , test := range singleUnicodeEscapeTests {
67+ r , ok := decodeSingleUnicodeEscape ([]byte (test .in ))
68+ isErr := ! ok
69+
70+ if isErr != test .isErr {
71+ t .Errorf ("decodeSingleUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t" , test .in , test .isErr , isErr )
72+ } else if isErr {
73+ continue
74+ } else if r != test .out {
75+ t .Errorf ("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)" , test .in , test .out , test .out , r , r )
76+ }
2877 }
78+ }
79+
80+ func TestDecodeUnicodeEscape (t * testing.T ) {
81+ for _ , test := range multiUnicodeEscapeTests {
82+ r , len := decodeUnicodeEscape ([]byte (test .in ))
83+ isErr := (len == - 1 )
2984
30- runeValues := []struct {
31- r rune
32- ok bool
33- }{
34- {'"' , true },
35- {'\\' , true },
36- {'\n' , true },
37- {'\t' , true },
38- {'/' , true },
39- {'\b' , true },
40- {'\f' , true },
85+ if isErr != test .isErr {
86+ t .Errorf ("decodeUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t" , test .in , test .isErr , isErr )
87+ } else if isErr {
88+ continue
89+ } else if len != test .len {
90+ t .Errorf ("decodeUnicodeEscape(%s) returned length mismatch: expected %d, obtained %d" , test .in , test .len , len )
91+ } else if r != test .out {
92+ t .Errorf ("decodeUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)" , test .in , test .out , test .out , r , r )
93+ }
4194 }
95+ }
96+
97+ type unescapeTest struct {
98+ in string
99+ out string
100+ canAlloc bool
101+ isErr bool
102+ }
42103
43- for i , esc := range escapeSequences {
44- expected := runeValues [i ]
45- if r , ok := decodeSingleUnicodeEscape ([]byte (esc )); ok != expected .ok {
46- t .Errorf ("decodeSingleUnicodeEscape(%s) returned 'ok' mismatch: expected %t, obtained %t" , esc , expected .ok , ok )
47- } else if r != expected .r {
48- t .Errorf ("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)" , esc , expected .r , expected .r , r , r )
104+ var unescapeTests = []unescapeTest {
105+ {in : `` , out : `` , canAlloc : false },
106+ {in : `a` , out : `a` , canAlloc : false },
107+ {in : `abcde` , out : `abcde` , canAlloc : false },
108+
109+ {in : `ab\\de` , out : `ab\de` , canAlloc : true },
110+ {in : `ab\"de` , out : `ab"de` , canAlloc : true },
111+ {in : `ab \u00B0 de` , out : `ab ° de` , canAlloc : true },
112+ {in : `ab \uD83D\uDE03 de` , out : "ab \U0001F603 de" , canAlloc : true },
113+ {in : `\u0000\u0000\u0000\u0000\u0000` , out : "\u0000 \u0000 \u0000 \u0000 \u0000 " , canAlloc : true },
114+ {in : `\u0000 \u0000 \u0000 \u0000 \u0000` , out : "\u0000 \u0000 \u0000 \u0000 \u0000 " , canAlloc : true },
115+ {in : ` \u0000 \u0000 \u0000 \u0000 \u0000 ` , out : " \u0000 \u0000 \u0000 \u0000 \u0000 " , canAlloc : true },
116+
117+ {in : `\uD800` , isErr : true },
118+ {in : `\uFFFF` , isErr : true },
119+ {in : `abcde\` , isErr : true },
120+ {in : `abcde\x` , isErr : true },
121+ {in : `abcde\u` , isErr : true },
122+ {in : `abcde\u1` , isErr : true },
123+ {in : `abcde\u12` , isErr : true },
124+ {in : `abcde\u123` , isErr : true },
125+ {in : `abcde\uD800` , isErr : true },
126+ {in : `ab\uD800de` , isErr : true },
127+ {in : `\uD800abcde` , isErr : true },
128+ }
129+
130+ // isSameMemory checks if two slices contain the same memory pointer (meaning one is a
131+ // subslice of the other, with possibly differing lengths/capacities).
132+ func isSameMemory (a , b []byte ) bool {
133+ if cap (a ) == 0 || cap (b ) == 0 {
134+ return cap (a ) == cap (b )
135+ } else if a , b = a [:1 ], b [:1 ]; a [0 ] != b [0 ] {
136+ return false
137+ } else {
138+ a [0 ]++
139+ same := (a [0 ] == b [0 ])
140+ a [0 ]--
141+ return same
142+ }
143+
144+ }
145+
146+ func TestUnescape (t * testing.T ) {
147+
148+ for _ , test := range unescapeTests {
149+ type bufferTestCase struct {
150+ buf []byte
151+ isTooSmall bool
152+ }
153+
154+ var bufs []bufferTestCase
155+
156+ if len (test .in ) == 0 {
157+ // If the input string is length 0, only a buffer of size 0 is a meaningful test
158+ bufs = []bufferTestCase {{nil , false }}
159+ } else {
160+ // For non-empty input strings, we can try several buffer sizes (0, len-1, len)
161+ bufs = []bufferTestCase {
162+ {nil , true },
163+ {make ([]byte , 0 , len (test .in )- 1 ), true },
164+ {make ([]byte , 0 , len (test .in )), false },
165+ }
166+ }
167+
168+ for _ , buftest := range bufs {
169+ in := []byte (test .in )
170+ buf := buftest .buf
171+
172+ out , err := unescape (in , buf )
173+ isErr := (err != nil )
174+ isAlloc := ! isSameMemory (out , in ) && ! isSameMemory (out , buf )
175+
176+ if isErr != test .isErr {
177+ t .Errorf ("unescape(`%s`, bufsize=%d) returned isErr mismatch: expected %t, obtained %t" , test .in , cap (buf ), test .isErr , isErr )
178+ break
179+ } else if isErr {
180+ continue
181+ } else if ! bytes .Equal (out , []byte (test .out )) {
182+ t .Errorf ("unescape(`%s`, bufsize=%d) returned unescaped mismatch: expected `%s` (%v, len %d), obtained `%s` (%v, len %d)" , test .in , cap (buf ), test .out , []byte (test .out ), len (test .out ), string (out ), out , len (out ))
183+ break
184+ } else if isAlloc != (test .canAlloc && buftest .isTooSmall ) {
185+ t .Errorf ("unescape(`%s`, bufsize=%d) returned isAlloc mismatch: expected %t, obtained %t" , test .in , cap (buf ), buftest .isTooSmall , isAlloc )
186+ break
187+ }
49188 }
50189 }
51190}
191+
192+ //
193+ //escapeSequences := []string{
194+ //`\"`,
195+ //`\\`,
196+ //`\n`,
197+ //`\t`,
198+ //`\r`,
199+ //`\/`,
200+ //`\b`,
201+ //`\f`,
202+ //}
203+ //
204+ //runeValues := []struct {
205+ //r rune
206+ //ok bool
207+ //}{
208+ //{'"', true},
209+ //{'\\', true},
210+ //{'\n', true},
211+ //{'\t', true},
212+ //{'/', true},
213+ //{'\b', true},
214+ //{'\f', true},
215+ //}
0 commit comments