Skip to content

Commit f28c4cf

Browse files
committed
Detect markers separated by blank lines from type declarations
This fixes issue #53 by detecting kubebuilder markers that are separated from type declarations by blank lines. Both single-line and multi-line marker groups are now properly detected. - Modified marker analyzer to check the previous comment group - Added extractOrphanedMarkers and helper functions - Added constants for marker prefix and separation limits - Added logic to skip commented-out code blocks - Renamed containsMultilineMarkers to containsMarkers for clarity - Added comprehensive test cases covering: - Multi-line markers with blank lines (BlankLineTest) - Single-line markers with blank lines (SingleLineMarkerTest) - Distance limits preventing false positives (TooFarAwayTest) - Commented-out code rejection (CommentedCodeTest) - List types with blank line markers (ServiceList) Signed-off-by: Sascha Grunert <sgrunert@redhat.com>
1 parent ab50bf5 commit f28c4cf

File tree

3 files changed

+503
-6
lines changed

3 files changed

+503
-6
lines changed

pkg/analysis/helpers/markers/analyzer.go

Lines changed: 288 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,21 @@ import (
4545
// is "k8s:item(one: "value", two: "value")=...".
4646
const UnnamedArgument = ""
4747

48+
// maxMarkerSeparationLines is the maximum number of lines that can separate
49+
// a marker comment group from the godoc comment for it to still be considered
50+
// associated with the type declaration.
51+
const maxMarkerSeparationLines = 3
52+
53+
// markerPrefix is the prefix that identifies a comment line as a marker.
54+
const markerPrefix = "// +"
55+
4856
// Markers allows access to markers extracted from the
4957
// go types.
5058
type Markers interface {
5159
// FieldMarkers returns markers associated to the field.
5260
FieldMarkers(*ast.Field) MarkerSet
5361

54-
// StructMarkers returns markers associated to the given sturct.
62+
// StructMarkers returns markers associated to the given struct.
5563
StructMarkers(*ast.StructType) MarkerSet
5664

5765
// TypeMarkers returns markers associated to the given type.
@@ -154,7 +162,19 @@ func run(pass *analysis.Pass) (any, error) {
154162
inspect.Preorder(nodeFilter, func(n ast.Node) {
155163
switch typ := n.(type) {
156164
case *ast.GenDecl:
157-
extractGenDeclMarkers(typ, results)
165+
// Find the file this declaration belongs to.
166+
// For most packages, there are only a few files (typically 1-10),
167+
// so a simple linear search is efficient and clear.
168+
var file *ast.File
169+
170+
for _, f := range pass.Files {
171+
if f.Pos() <= typ.Pos() && typ.End() <= f.End() {
172+
file = f
173+
break
174+
}
175+
}
176+
177+
extractGenDeclMarkers(typ, file, pass.Fset, results)
158178
case *ast.Field:
159179
extractFieldMarkers(typ, results)
160180
}
@@ -163,15 +183,20 @@ func run(pass *analysis.Pass) (any, error) {
163183
return results, nil
164184
}
165185

166-
func extractGenDeclMarkers(typ *ast.GenDecl, results *markers) {
186+
func extractGenDeclMarkers(typ *ast.GenDecl, file *ast.File, fset *token.FileSet, results *markers) {
167187
declMarkers := NewMarkerSet()
168188

189+
// Collect markers from the GenDecl's Doc field (comments directly attached to the declaration)
169190
if typ.Doc != nil {
170191
for _, comment := range typ.Doc.List {
171192
if marker := extractMarker(comment); marker.Identifier != "" {
172193
declMarkers.Insert(marker)
173194
}
174195
}
196+
197+
// Also collect markers from the comment group immediately before the godoc comment
198+
// if separated by a blank line.
199+
extractOrphanedMarkers(typ.Doc, file, fset, declMarkers)
175200
}
176201

177202
if len(typ.Specs) == 0 {
@@ -190,6 +215,255 @@ func extractGenDeclMarkers(typ *ast.GenDecl, results *markers) {
190215
}
191216
}
192217

218+
// extractOrphanedMarkers finds markers in the comment group immediately before the godoc comment
219+
// that are separated by a blank line. Only the immediately preceding comment group is checked,
220+
// and it must be within maxMarkerSeparationLines lines of the godoc comment.
221+
//
222+
// This handles the "second level comment bug" (issue #53) where markers are separated from type
223+
// declarations by blank lines, which commonly occurs in real-world Kubernetes API code.
224+
//
225+
// Example scenario this handles:
226+
//
227+
// // +kubebuilder:object:root=true
228+
// // +kubebuilder:subresource:status
229+
//
230+
// // ClusterList contains a list of Cluster.
231+
// type ClusterList struct {
232+
// metav1.TypeMeta `json:",inline"`
233+
// metav1.ListMeta `json:"metadata,omitempty"`
234+
// Items []Cluster `json:"items"`
235+
// }
236+
//
237+
// The markers will be detected even though separated by a blank line from the godoc comment.
238+
// Note: Only multi-line marker groups are considered orphaned. Single-line markers are assumed
239+
// to be regular Doc comments already handled by the AST parser.
240+
func extractOrphanedMarkers(docGroup *ast.CommentGroup, file *ast.File, fset *token.FileSet, declMarkers MarkerSet) {
241+
if file == nil || fset == nil {
242+
return
243+
}
244+
245+
prevGroup := findPreviousCommentGroup(docGroup, file)
246+
if prevGroup == nil {
247+
return
248+
}
249+
250+
if !isValidOrphanedMarkerGroup(prevGroup, docGroup, file, fset) {
251+
return
252+
}
253+
254+
// Extract markers from the previous comment group
255+
for _, comment := range prevGroup.List {
256+
if marker := extractMarker(comment); marker.Identifier != "" {
257+
declMarkers.Insert(marker)
258+
}
259+
}
260+
}
261+
262+
// findPreviousCommentGroup finds the comment group immediately before the given docGroup.
263+
func findPreviousCommentGroup(docGroup *ast.CommentGroup, file *ast.File) *ast.CommentGroup {
264+
for i, cg := range file.Comments {
265+
if cg == docGroup && i > 0 {
266+
return file.Comments[i-1]
267+
}
268+
}
269+
270+
return nil
271+
}
272+
273+
// isValidOrphanedMarkerGroup checks if the previous comment group is a valid orphaned marker group.
274+
func isValidOrphanedMarkerGroup(prevGroup, docGroup *ast.CommentGroup, file *ast.File, fset *token.FileSet) bool {
275+
// Check if the comment groups are properly separated
276+
if !isProperlySeparated(prevGroup, docGroup, fset) {
277+
return false
278+
}
279+
280+
// Only extract if the comment group contains markers
281+
if !containsMarkers(prevGroup) {
282+
return false
283+
}
284+
285+
// Check if this previous comment group is a Doc comment for another declaration
286+
return !isDocCommentForDeclaration(prevGroup, file)
287+
}
288+
289+
// isProperlySeparated checks if comment groups are separated by at least one blank line.
290+
func isProperlySeparated(prevGroup, docGroup *ast.CommentGroup, fset *token.FileSet) bool {
291+
docStartLine := fset.Position(docGroup.Pos()).Line
292+
prevEndLine := fset.Position(prevGroup.End()).Line
293+
lineDiff := docStartLine - prevEndLine
294+
295+
// lineDiff > 1: ensures at least one blank line
296+
// lineDiff <= maxMarkerSeparationLines: ensures not too far apart
297+
return lineDiff > 1 && lineDiff <= maxMarkerSeparationLines
298+
}
299+
300+
// containsMarkers checks if a comment group contains at least one marker.
301+
// It also ensures the comment group doesn't contain commented-out code.
302+
//
303+
// This function detects both single-line and multi-line marker groups that are
304+
// separated from type declarations by blank lines (orphaned markers).
305+
//
306+
// Single-line comments immediately before a type declaration (without a blank line)
307+
// are already captured as Doc comments by the Go AST parser and processed normally.
308+
//
309+
// Example of what IS detected (orphaned markers separated by blank line):
310+
//
311+
// // +kubebuilder:object:root=true
312+
//
313+
// // MyType does something
314+
// type MyType struct {}
315+
//
316+
// Or multi-line:
317+
//
318+
// // +kubebuilder:object:root=true
319+
// // +kubebuilder:subresource:status
320+
//
321+
// // MyType does something
322+
// type MyType struct {}
323+
//
324+
// Example of what is NOT detected (marker without blank line, already handled as Doc comment):
325+
//
326+
// // +kubebuilder:object:root=true
327+
// // MyType does something
328+
// type MyType struct {}
329+
func containsMarkers(group *ast.CommentGroup) bool {
330+
if len(group.List) == 0 {
331+
return false
332+
}
333+
334+
hasMarker := false
335+
336+
for _, comment := range group.List {
337+
text := comment.Text
338+
if strings.HasPrefix(text, markerPrefix) {
339+
hasMarker = true
340+
} else if looksLikeCommentedCode(text) {
341+
// Skip comment groups that contain commented-out code
342+
return false
343+
}
344+
}
345+
346+
return hasMarker
347+
}
348+
349+
// looksLikeCommentedCode checks if a comment line looks like commented-out code.
350+
func looksLikeCommentedCode(text string) bool {
351+
content := prepareContentForAnalysis(text)
352+
353+
// Empty lines or lines starting with markers are not code
354+
if content == "" || strings.HasPrefix(content, "+") {
355+
return false
356+
}
357+
358+
return hasCodeIndicators(content)
359+
}
360+
361+
// prepareContentForAnalysis strips comment prefixes and normalizes the content.
362+
func prepareContentForAnalysis(text string) string {
363+
content := strings.TrimPrefix(text, "//")
364+
return strings.TrimSpace(content)
365+
}
366+
367+
// hasCodeIndicators checks if content contains patterns that indicate Go code.
368+
func hasCodeIndicators(content string) bool {
369+
// Check for struct tags (backticks are a strong signal of Go code)
370+
if strings.Contains(content, "`") {
371+
return true
372+
}
373+
374+
// Check for field declaration patterns
375+
if hasFieldDeclarationPattern(content) {
376+
return true
377+
}
378+
379+
// Check for assignment operators
380+
if hasAssignmentOperators(content) {
381+
return true
382+
}
383+
384+
// Check for function call patterns
385+
if hasFunctionCallPattern(content) {
386+
return true
387+
}
388+
389+
// Check for Go keywords at the start of the line
390+
return hasCodeKeywordPrefix(content)
391+
}
392+
393+
// hasAssignmentOperators checks if content contains Go assignment operators.
394+
func hasAssignmentOperators(content string) bool {
395+
assignmentOps := []string{" := ", " = ", " += ", " -= ", " *= ", " /="}
396+
for _, op := range assignmentOps {
397+
if strings.Contains(content, op) {
398+
return true
399+
}
400+
}
401+
402+
return false
403+
}
404+
405+
// hasCodeKeywordPrefix checks if content starts with Go code keywords.
406+
func hasCodeKeywordPrefix(content string) bool {
407+
// Go declaration keywords
408+
codeKeywords := []string{"func ", "type ", "var ", "const ", "import ", "package ", "struct ", "interface "}
409+
for _, keyword := range codeKeywords {
410+
if strings.HasPrefix(content, keyword) {
411+
return true
412+
}
413+
}
414+
415+
// Control flow keywords
416+
controlFlowKeywords := []string{"if ", "for ", "switch ", "case ", "return ", "break ", "continue ", "defer ", "go ", "select "}
417+
for _, keyword := range controlFlowKeywords {
418+
if strings.HasPrefix(content, keyword) {
419+
return true
420+
}
421+
}
422+
423+
return false
424+
}
425+
426+
// hasFieldDeclarationPattern checks if the content looks like a Go field declaration.
427+
// Examples: "Name string", "Count int", "Enabled *bool", "*Field Type".
428+
func hasFieldDeclarationPattern(content string) bool {
429+
// Look for common Go type names after a potential field name
430+
if typePattern.MatchString(content) {
431+
return true
432+
}
433+
434+
// Also check for pointer field declarations: *Type
435+
if strings.HasPrefix(content, "*") && len(content) > 1 && content[1] != ' ' {
436+
return true
437+
}
438+
439+
return false
440+
}
441+
442+
// hasFunctionCallPattern checks if the content looks like a function call.
443+
// Examples: "someFunc()", "pkg.Method(arg)", "New()".
444+
func hasFunctionCallPattern(content string) bool {
445+
// Simple heuristic: word followed by ( with something inside )
446+
return funcPattern.MatchString(content)
447+
}
448+
449+
// isDocCommentForDeclaration checks if the comment group is a Doc comment for any declaration.
450+
func isDocCommentForDeclaration(group *ast.CommentGroup, file *ast.File) bool {
451+
for _, decl := range file.Decls {
452+
switch d := decl.(type) {
453+
case *ast.GenDecl:
454+
if d.Doc == group {
455+
return true
456+
}
457+
case *ast.FuncDecl:
458+
if d.Doc == group {
459+
return true
460+
}
461+
}
462+
}
463+
464+
return false
465+
}
466+
193467
func extractFieldMarkers(field *ast.Field, results *markers) {
194468
if field == nil || field.Doc == nil {
195469
return
@@ -213,12 +487,20 @@ func extractFieldMarkers(field *ast.Field, results *markers) {
213487
// while supporting declarative validation tags with parentheses and nested markers.
214488
var validMarkerStart = regexp.MustCompile(`^[a-zA-Z]([a-zA-Z0-9:\(\)\"\" ,])+=?`)
215489

490+
// typePattern matches common Go field declaration patterns.
491+
// Examples: "Name string", "Count int", "Enabled *bool".
492+
var typePattern = regexp.MustCompile(`^\w+\s+\*?(string|int|int32|int64|uint|uint32|uint64|bool|float32|float64|byte|rune)\b`)
493+
494+
// funcPattern matches function call patterns.
495+
// Examples: "someFunc()", "pkg.Method(arg)", "New()".
496+
var funcPattern = regexp.MustCompile(`\w+(\.\w+)?\([^)]*\)`)
497+
216498
func extractMarker(comment *ast.Comment) Marker {
217-
if !strings.HasPrefix(comment.Text, "// +") {
499+
if !strings.HasPrefix(comment.Text, markerPrefix) {
218500
return Marker{}
219501
}
220502

221-
markerContent := strings.TrimPrefix(comment.Text, "// +")
503+
markerContent := strings.TrimPrefix(comment.Text, markerPrefix)
222504

223505
// Valid markers must start with an alphabetic character (a-zA-Z).
224506
// This excludes markdown tables (e.g., "// +-------") and other non-marker content,
@@ -460,7 +742,7 @@ type Marker struct {
460742

461743
// String returns the string representation of the marker.
462744
func (m Marker) String() string {
463-
return strings.TrimPrefix(m.RawComment, "// +")
745+
return strings.TrimPrefix(m.RawComment, markerPrefix)
464746
}
465747

466748
// MarkerSet is a set implementation for Markers that uses

0 commit comments

Comments
 (0)