From 6b932e3a9731ed55fab099a5bca75ec90be86806 Mon Sep 17 00:00:00 2001 From: Buck Evan Date: Wed, 22 Oct 2025 13:12:15 -0500 Subject: [PATCH 1/2] Ensure exhaustive switch statements PR #162 revealed that missing node type handlers in switch statements cause silent data loss - CDATA content was being dropped because CharDataNode wasn't handled. This PR adds defensive programming to prevent this entire class of bug. Changes: - Added exhaustive switch coverage for all xmlquery.NodeType values - Added exhaustive switch coverage for xml.Token and html.TokenType - Added exhaustive switch coverage for json.Token and json.Delim - Added default cases with panic() to all exhaustive switches - Added explicit documented defaults to intentionally non-exhaustive switches This follows the Go stdlib pattern (runtime/panic.go, go/types) of using panic("unreachable") for "should be impossible" states. If future xmlquery versions add new node types, or if we overlook a handler, tests will fail immediately rather than silently corrupting output. All switches now explicitly handle defaults: - 8 exhaustive switches panic on unknown values - 7 non-exhaustive switches have documented intentional behavior Added TestExhaustiveNodeTypeHandling to verify all node types are processed without panicking. All existing tests pass. This is a non-breaking change - no API modifications, only defensive additions to switch statements. Related: #162 --- internal/utils/config.go | 2 ++ internal/utils/jsonutil.go | 23 ++++++++++++++++++- internal/utils/jsonutil_test.go | 39 +++++++++++++++++++++++++++++++++ internal/utils/utils.go | 13 +++++++++++ 4 files changed, 76 insertions(+), 1 deletion(-) diff --git a/internal/utils/config.go b/internal/utils/config.go index d426f21..b37c7b7 100644 --- a/internal/utils/config.go +++ b/internal/utils/config.go @@ -61,6 +61,8 @@ func LoadConfig(fileName string) error { config.NoColor, _ = strconv.ParseBool(value) case "color": config.Color, _ = strconv.ParseBool(value) + default: + // Ignore unknown config options for forward compatibility } } diff --git a/internal/utils/jsonutil.go b/internal/utils/jsonutil.go index 50b16ca..768a95d 100644 --- a/internal/utils/jsonutil.go +++ b/internal/utils/jsonutil.go @@ -1,6 +1,7 @@ package utils import ( + "fmt" "strings" "github.com/antchfx/xmlquery" @@ -39,6 +40,11 @@ func NodeToJSON(node *xmlquery.Node, depth int) interface{} { if text != "" { textParts = append(textParts, text) } + case xmlquery.CommentNode, xmlquery.DeclarationNode, xmlquery.ProcessingInstruction, xmlquery.NotationNode: + // Skip these in JSON output + default: + // Should be impossible: all valid child node types handled above + panic(fmt.Sprintf("unknown NodeType as child of DocumentNode: %v", child.Type)) } } @@ -53,8 +59,13 @@ func NodeToJSON(node *xmlquery.Node, depth int) interface{} { case xmlquery.TextNode, xmlquery.CharDataNode: return strings.TrimSpace(node.Data) - default: + case xmlquery.CommentNode: + // Comments passed as root, return empty return nil + + default: + // Should be impossible: DocumentNode, ElementNode, TextNode, CharDataNode, CommentNode are the only valid root nodes + panic(fmt.Sprintf("unknown NodeType passed to NodeToJSON: %v", node.Type)) } } @@ -79,6 +90,11 @@ func nodeToJSONInternal(node *xmlquery.Node, depth int) interface{} { case xmlquery.ElementNode: childResult := nodeToJSONInternal(child, depth-1) addToResult(result, child.Data, childResult) + case xmlquery.CommentNode, xmlquery.ProcessingInstruction: + // Skip these in JSON output + default: + // Should be impossible: all valid element child types handled above + panic(fmt.Sprintf("unknown NodeType as child of ElementNode: %v", child.Type)) } } @@ -103,6 +119,11 @@ func getTextContent(node *xmlquery.Node) string { } case xmlquery.ElementNode: parts = append(parts, getTextContent(child)) + case xmlquery.CommentNode, xmlquery.ProcessingInstruction: + // Skip these when extracting text + default: + // Should be impossible: all valid element child types handled above + panic(fmt.Sprintf("unknown NodeType in getTextContent: %v", child.Type)) } } return strings.Join(parts, "\n") diff --git a/internal/utils/jsonutil_test.go b/internal/utils/jsonutil_test.go index 22591a4..8245373 100644 --- a/internal/utils/jsonutil_test.go +++ b/internal/utils/jsonutil_test.go @@ -45,3 +45,42 @@ func TestXmlToJSON(t *testing.T) { assert.Equal(t, expectedJson, output.String()) } } + +func TestExhaustiveNodeTypeHandling(t *testing.T) { + // Test that all xmlquery node types are handled without panicking + // This verifies our exhaustive switch statements work correctly + + xmlInput := ` + + + + text content + content]]> + + + textmoretail +` + + node, err := xmlquery.Parse(strings.NewReader(xmlInput)) + assert.NoError(t, err) + + // Should not panic - this exercises all the node types + result := NodeToJSON(node, -1) + assert.NotNil(t, result) + + // Verify the result is a map + resultMap, ok := result.(map[string]interface{}) + assert.True(t, ok) + + // Verify root element exists + root, ok := resultMap["root"] + assert.True(t, ok) + + rootMap, ok := root.(map[string]interface{}) + assert.True(t, ok) + + // Verify CDATA is preserved as text + cdataElem, ok := rootMap["cdata"] + assert.True(t, ok) + assert.Contains(t, cdataElem, "raw & unescaped") +} diff --git a/internal/utils/utils.go b/internal/utils/utils.go index 2597b2a..9f683be 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -190,6 +190,8 @@ func FormatXml(reader io.Reader, writer io.Writer, indent string, colors int) er _, _ = fmt.Fprint(writer, tagColor("")) _, _ = fmt.Fprint(writer, newline, strings.Repeat(indent, level)) default: + // Should be impossible: all xml.Token types handled above + panic(fmt.Sprintf("unknown xml.Token type: %T", token)) } } @@ -403,6 +405,9 @@ func FormatHtml(reader io.Reader, writer io.Writer, indent string, colors int) e if level == 0 { _, _ = fmt.Fprint(writer, newline) } + default: + // Should be impossible: all html.TokenType values handled above + panic(fmt.Sprintf("unknown html.TokenType: %v", token)) } } @@ -469,6 +474,9 @@ func FormatJson(reader io.Reader, writer io.Writer, indent string, colors int) e level-- } _, _ = fmt.Fprint(writer, newline, strings.Repeat(indent, level), tagColor("]")) + default: + // Should be impossible: json.Delim can only be '{', '}', '[', ']' + panic(fmt.Sprintf("unknown json.Delim: %v", tokenType)) } case string: escapedToken := strconv.Quote(token.(string)) @@ -485,6 +493,9 @@ func FormatJson(reader io.Reader, writer io.Writer, indent string, colors int) e _, _ = fmt.Fprintf(writer, "%s%v", prefix, valueColor(token)) case nil: _, _ = fmt.Fprintf(writer, "%s%s", prefix, valueColor("null")) + default: + // Should be impossible: all json.Token types handled above + panic(fmt.Sprintf("unknown json.Token type: %T", token)) } switch tokenState { @@ -494,6 +505,8 @@ func FormatJson(reader io.Reader, writer io.Writer, indent string, colors int) e suffix = "," + newline + strings.Repeat(indent, level) case jsonTokenArrayComma: suffix = "," + newline + strings.Repeat(indent, level) + default: + // Other token states don't affect suffix formatting } prefix = suffix From 6ddf8935029ca369cd837eb21745b0c8c752d75d Mon Sep 17 00:00:00 2001 From: Buck Evan Date: Fri, 14 Nov 2025 16:07:46 -0600 Subject: [PATCH 2/2] Add tests for defensive panic guards in jsonutil.go - Test all 4 panic cases with invalid NodeType(255) values - Achieves 100% coverage of jsonutil.go panic guards - Verifies defensive programming works as intended The 4 utils.go formatter panics remain untested as they would require refactoring to inject mock tokens (stdlib parsers control token creation). The jsonutil.go panics are fully tested and prove the defensive guards work correctly. --- internal/utils/jsonutil_test.go | 76 +++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/internal/utils/jsonutil_test.go b/internal/utils/jsonutil_test.go index 8245373..e801f76 100644 --- a/internal/utils/jsonutil_test.go +++ b/internal/utils/jsonutil_test.go @@ -84,3 +84,79 @@ func TestExhaustiveNodeTypeHandling(t *testing.T) { assert.True(t, ok) assert.Contains(t, cdataElem, "raw & unescaped") } + +func TestUnknownNodeTypePanics(t *testing.T) { + // Test that unknown node types trigger defensive panics + // This ensures we catch issues if xmlquery adds new node types + + t.Run("unknown NodeType passed to NodeToJSON panics", func(t *testing.T) { + // Create a node with an invalid type + invalidNode := &xmlquery.Node{ + Type: xmlquery.NodeType(255), // Invalid type + Data: "test", + } + + assert.Panics(t, func() { + NodeToJSON(invalidNode, -1) + }, "NodeToJSON should panic on unknown node type") + }) + + t.Run("unknown NodeType as child of DocumentNode panics", func(t *testing.T) { + // Create a document with an invalid child + doc := &xmlquery.Node{ + Type: xmlquery.DocumentNode, + } + invalidChild := &xmlquery.Node{ + Type: xmlquery.NodeType(255), // Invalid type + Data: "test", + } + doc.FirstChild = invalidChild + invalidChild.Parent = doc + + assert.Panics(t, func() { + NodeToJSON(doc, -1) + }, "NodeToJSON should panic on unknown child type under DocumentNode") + }) + + t.Run("unknown NodeType as child of ElementNode panics", func(t *testing.T) { + // Create a document with an element that has an invalid child + doc := &xmlquery.Node{ + Type: xmlquery.DocumentNode, + } + elem := &xmlquery.Node{ + Type: xmlquery.ElementNode, + Data: "root", + Parent: doc, + } + invalidChild := &xmlquery.Node{ + Type: xmlquery.NodeType(255), // Invalid type + Data: "test", + Parent: elem, + } + doc.FirstChild = elem + elem.FirstChild = invalidChild + + assert.Panics(t, func() { + NodeToJSON(doc, -1) + }, "NodeToJSON should panic on unknown child type under ElementNode") + }) + + t.Run("unknown NodeType in getTextContent panics", func(t *testing.T) { + // getTextContent is called when extracting text from elements + // Create an element with mixed content including invalid node + elem := &xmlquery.Node{ + Type: xmlquery.ElementNode, + Data: "test", + } + invalidChild := &xmlquery.Node{ + Type: xmlquery.NodeType(255), // Invalid type + Data: "test", + Parent: elem, + } + elem.FirstChild = invalidChild + + assert.Panics(t, func() { + getTextContent(elem) + }, "getTextContent should panic on unknown node type") + }) +}