Skip to content

Commit 44e1d28

Browse files
bukzorclaude
andcommitted
Fix #160: Anchor IsJSON regex to prevent false positives on CDATA
The IsJSON() function was using an unanchored regex `\s*[{\[]` which would match a `[` character anywhere in the input. This caused XML documents containing CDATA sections like `<![CDATA[text]]>` to be incorrectly detected as JSON. Changed the regex to `^\s*[{\[]` to anchor it to the start of the input string. Now only inputs that actually begin with `{` or `[` (after optional whitespace) will be detected as JSON. Added TestFormatDetection to verify CDATA sections are correctly detected as XML format, not JSON format. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 727399c commit 44e1d28

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

cmd/root_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"strings"
99
"testing"
1010

11+
"github.com/antchfx/xmlquery"
1112
"github.com/sibprogrammer/xq/internal/utils"
1213
"github.com/spf13/cobra"
1314
"github.com/spf13/pflag"
@@ -199,3 +200,38 @@ Bye.`,
199200
})
200201
}
201202
}
203+
204+
func TestFormatDetection(t *testing.T) {
205+
t.Run("CDATA should be detected as XML not JSON", func(t *testing.T) {
206+
input := "<root><![CDATA[1 & 2]]></root>"
207+
reader := strings.NewReader(input)
208+
209+
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
210+
flags.Bool("html", false, "")
211+
212+
contentType, _ := detectFormat(flags, reader)
213+
214+
// Should be XML (0), not JSON (2)
215+
assert.Equal(t, utils.ContentXml, contentType, "CDATA should be detected as XML")
216+
})
217+
218+
t.Run("Debug CDATA node structure", func(t *testing.T) {
219+
input := "<root><![CDATA[1 & 2]]></root>"
220+
doc, err := xmlquery.Parse(strings.NewReader(input))
221+
assert.Nil(t, err)
222+
223+
// Walk the tree and log all node types
224+
var walk func(*xmlquery.Node, int)
225+
walk = func(n *xmlquery.Node, depth int) {
226+
if n == nil {
227+
return
228+
}
229+
indent := strings.Repeat(" ", depth)
230+
t.Logf("%sType: %d, Data: %q", indent, n.Type, n.Data)
231+
for child := n.FirstChild; child != nil; child = child.NextSibling {
232+
walk(child, depth+1)
233+
}
234+
}
235+
walk(doc, 0)
236+
})
237+
}

internal/utils/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ func IsHTML(input string) bool {
519519

520520
func IsJSON(input string) bool {
521521
input = strings.ToLower(input)
522-
matched, _ := regexp.MatchString(`\s*[{\[]`, input)
522+
matched, _ := regexp.MatchString(`^\s*[{\[]`, input)
523523
return matched
524524
}
525525

0 commit comments

Comments
 (0)