Skip to content

Commit 2ec3df7

Browse files
committed
Detect markers separated by blank lines from type declarations
- Modified marker analyzer to check the previous comment group - Added extractOrphanedMarkers and helper functions - Added constants for marker prefix and separation limits - Added logic to skip commented-out code blocks - Added test cases for List types and non-List types with blank lines Signed-off-by: Sascha Grunert <sgrunert@redhat.com>
1 parent f87b38e commit 2ec3df7

File tree

3 files changed

+476
-6
lines changed

3 files changed

+476
-6
lines changed

pkg/analysis/helpers/markers/analyzer.go

Lines changed: 282 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,21 @@ import (
4545
// is "k8s:item(one: "value", two: "value")=...".
4646
const UnnamedArgument = ""
4747

48+
// maxMarkerSeparationLines is the maximum number of lines that can separate
49+
// a marker comment group from the godoc comment for it to still be considered
50+
// associated with the type declaration.
51+
const maxMarkerSeparationLines = 3
52+
53+
// markerPrefix is the prefix that identifies a comment line as a marker.
54+
const markerPrefix = "// +"
55+
4856
// Markers allows access to markers extracted from the
4957
// go types.
5058
type Markers interface {
5159
// FieldMarkers returns markers associated to the field.
5260
FieldMarkers(*ast.Field) MarkerSet
5361

54-
// StructMarkers returns markers associated to the given sturct.
62+
// StructMarkers returns markers associated to the given struct.
5563
StructMarkers(*ast.StructType) MarkerSet
5664

5765
// TypeMarkers returns markers associated to the given type.
@@ -154,7 +162,17 @@ func run(pass *analysis.Pass) (any, error) {
154162
inspect.Preorder(nodeFilter, func(n ast.Node) {
155163
switch typ := n.(type) {
156164
case *ast.GenDecl:
157-
extractGenDeclMarkers(typ, results)
165+
// Find the file this declaration belongs to
166+
var file *ast.File
167+
168+
for _, f := range pass.Files {
169+
if f.Pos() <= typ.Pos() && typ.End() <= f.End() {
170+
file = f
171+
break
172+
}
173+
}
174+
175+
extractGenDeclMarkers(typ, file, pass.Fset, results)
158176
case *ast.Field:
159177
extractFieldMarkers(typ, results)
160178
}
@@ -163,15 +181,20 @@ func run(pass *analysis.Pass) (any, error) {
163181
return results, nil
164182
}
165183

166-
func extractGenDeclMarkers(typ *ast.GenDecl, results *markers) {
184+
func extractGenDeclMarkers(typ *ast.GenDecl, file *ast.File, fset *token.FileSet, results *markers) {
167185
declMarkers := NewMarkerSet()
168186

187+
// Collect markers from the GenDecl's Doc field (comments directly attached to the declaration)
169188
if typ.Doc != nil {
170189
for _, comment := range typ.Doc.List {
171190
if marker := extractMarker(comment); marker.Identifier != "" {
172191
declMarkers.Insert(marker)
173192
}
174193
}
194+
195+
// Also collect markers from the comment group immediately before the godoc comment
196+
// if separated by a blank line.
197+
extractOrphanedMarkers(typ.Doc, file, fset, declMarkers)
175198
}
176199

177200
if len(typ.Specs) == 0 {
@@ -190,6 +213,251 @@ func extractGenDeclMarkers(typ *ast.GenDecl, results *markers) {
190213
}
191214
}
192215

216+
// extractOrphanedMarkers finds markers in the comment group immediately before the godoc comment
217+
// that are separated by a blank line. Only the immediately preceding comment group is checked,
218+
// and it must be within maxMarkerSeparationLines lines of the godoc comment.
219+
//
220+
// This handles the "second level comment bug" (issue #53) where markers are separated from type
221+
// declarations by blank lines, which commonly occurs in real-world Kubernetes API code.
222+
//
223+
// Example scenario this handles:
224+
//
225+
// // +kubebuilder:object:root=true
226+
// // +kubebuilder:subresource:status
227+
//
228+
// // ClusterList contains a list of Cluster.
229+
// type ClusterList struct {
230+
// metav1.TypeMeta `json:",inline"`
231+
// metav1.ListMeta `json:"metadata,omitempty"`
232+
// Items []Cluster `json:"items"`
233+
// }
234+
//
235+
// The markers will be detected even though separated by a blank line from the godoc comment.
236+
// Note: Only multi-line marker groups are considered orphaned. Single-line markers are assumed
237+
// to be regular Doc comments already handled by the AST parser.
238+
func extractOrphanedMarkers(docGroup *ast.CommentGroup, file *ast.File, fset *token.FileSet, declMarkers MarkerSet) {
239+
if file == nil || fset == nil {
240+
return
241+
}
242+
243+
prevGroup := findPreviousCommentGroup(docGroup, file)
244+
if prevGroup == nil {
245+
return
246+
}
247+
248+
if !isValidOrphanedMarkerGroup(prevGroup, docGroup, file, fset) {
249+
return
250+
}
251+
252+
// Extract markers from the previous comment group
253+
for _, comment := range prevGroup.List {
254+
if marker := extractMarker(comment); marker.Identifier != "" {
255+
declMarkers.Insert(marker)
256+
}
257+
}
258+
}
259+
260+
// findPreviousCommentGroup finds the comment group immediately before the given docGroup.
261+
func findPreviousCommentGroup(docGroup *ast.CommentGroup, file *ast.File) *ast.CommentGroup {
262+
for i, cg := range file.Comments {
263+
if cg == docGroup && i > 0 {
264+
return file.Comments[i-1]
265+
}
266+
}
267+
268+
return nil
269+
}
270+
271+
// isValidOrphanedMarkerGroup checks if the previous comment group is a valid orphaned marker group.
272+
func isValidOrphanedMarkerGroup(prevGroup, docGroup *ast.CommentGroup, file *ast.File, fset *token.FileSet) bool {
273+
// Check if the comment groups are properly separated
274+
if !isProperlySeparated(prevGroup, docGroup, fset) {
275+
return false
276+
}
277+
278+
// Only extract if the comment group has multiple lines and contains markers
279+
if !containsMultilineMarkers(prevGroup) {
280+
return false
281+
}
282+
283+
// Check if this previous comment group is a Doc comment for another declaration
284+
return !isDocCommentForDeclaration(prevGroup, file)
285+
}
286+
287+
// isProperlySeparated checks if comment groups are separated by at least one blank line.
288+
func isProperlySeparated(prevGroup, docGroup *ast.CommentGroup, fset *token.FileSet) bool {
289+
docStartLine := fset.Position(docGroup.Pos()).Line
290+
prevEndLine := fset.Position(prevGroup.End()).Line
291+
lineDiff := docStartLine - prevEndLine
292+
293+
// lineDiff > 1: ensures at least one blank line
294+
// lineDiff <= maxMarkerSeparationLines: ensures not too far apart
295+
return lineDiff > 1 && lineDiff <= maxMarkerSeparationLines
296+
}
297+
298+
// containsMultilineMarkers checks if a comment group has multiple lines and contains at least one marker.
299+
// It also ensures the comment group doesn't contain commented-out code.
300+
//
301+
// Single-line marker groups are intentionally rejected because:
302+
// 1. Single-line comments immediately before a type declaration (without a blank line)
303+
// are already captured as Doc comments by the Go AST parser and processed normally.
304+
// 2. A single marker line separated by a blank line from documentation is ambiguous -
305+
// it's unclear whether it's an orphaned marker or misplaced documentation.
306+
// 3. Multi-line marker groups provide stronger signal that they are intentional marker
307+
// blocks that got separated from the type declaration by formatting.
308+
//
309+
// Example of what IS detected (multi-line orphaned markers):
310+
//
311+
// // +kubebuilder:object:root=true
312+
// // +kubebuilder:subresource:status
313+
//
314+
// // MyType does something
315+
// type MyType struct {}
316+
//
317+
// Example of what is NOT detected (single-line, already handled as Doc comment):
318+
//
319+
// // +kubebuilder:object:root=true
320+
// // MyType does something
321+
// type MyType struct {}
322+
func containsMultilineMarkers(group *ast.CommentGroup) bool {
323+
// Require at least 2 lines to distinguish from normal Doc comments
324+
if len(group.List) <= 1 {
325+
return false
326+
}
327+
328+
hasMarker := false
329+
330+
for _, comment := range group.List {
331+
text := comment.Text
332+
if strings.HasPrefix(text, markerPrefix) {
333+
hasMarker = true
334+
} else if looksLikeCommentedCode(text) {
335+
// Skip comment groups that contain commented-out code
336+
return false
337+
}
338+
}
339+
340+
return hasMarker
341+
}
342+
343+
// looksLikeCommentedCode checks if a comment line looks like commented-out code.
344+
func looksLikeCommentedCode(text string) bool {
345+
content := prepareContentForAnalysis(text)
346+
347+
// Empty lines or lines starting with markers are not code
348+
if content == "" || strings.HasPrefix(content, "+") {
349+
return false
350+
}
351+
352+
return hasCodeIndicators(content)
353+
}
354+
355+
// prepareContentForAnalysis strips comment prefixes and normalizes the content.
356+
func prepareContentForAnalysis(text string) string {
357+
content := strings.TrimPrefix(text, "//")
358+
return strings.TrimSpace(content)
359+
}
360+
361+
// hasCodeIndicators checks if content contains patterns that indicate Go code.
362+
func hasCodeIndicators(content string) bool {
363+
// Check for struct tags (backticks are a strong signal of Go code)
364+
if strings.Contains(content, "`") {
365+
return true
366+
}
367+
368+
// Check for field declaration patterns
369+
if hasFieldDeclarationPattern(content) {
370+
return true
371+
}
372+
373+
// Check for assignment operators
374+
if hasAssignmentOperators(content) {
375+
return true
376+
}
377+
378+
// Check for function call patterns
379+
if hasFunctionCallPattern(content) {
380+
return true
381+
}
382+
383+
// Check for Go keywords at the start of the line
384+
return hasCodeKeywordPrefix(content)
385+
}
386+
387+
// hasAssignmentOperators checks if content contains Go assignment operators.
388+
func hasAssignmentOperators(content string) bool {
389+
assignmentOps := []string{" := ", " = ", " += ", " -= ", " *= ", " /="}
390+
for _, op := range assignmentOps {
391+
if strings.Contains(content, op) {
392+
return true
393+
}
394+
}
395+
396+
return false
397+
}
398+
399+
// hasCodeKeywordPrefix checks if content starts with Go code keywords.
400+
func hasCodeKeywordPrefix(content string) bool {
401+
// Go declaration keywords
402+
codeKeywords := []string{"func ", "type ", "var ", "const ", "import ", "package ", "struct ", "interface "}
403+
for _, keyword := range codeKeywords {
404+
if strings.HasPrefix(content, keyword) {
405+
return true
406+
}
407+
}
408+
409+
// Control flow keywords
410+
controlFlowKeywords := []string{"if ", "for ", "switch ", "case ", "return ", "break ", "continue ", "defer ", "go ", "select "}
411+
for _, keyword := range controlFlowKeywords {
412+
if strings.HasPrefix(content, keyword) {
413+
return true
414+
}
415+
}
416+
417+
return false
418+
}
419+
420+
// hasFieldDeclarationPattern checks if the content looks like a Go field declaration.
421+
// Examples: "Name string", "Count int", "Enabled *bool", "*Field Type".
422+
func hasFieldDeclarationPattern(content string) bool {
423+
// Look for common Go type names after a potential field name
424+
if typePattern.MatchString(content) {
425+
return true
426+
}
427+
428+
// Also check for pointer field declarations: *Type
429+
if strings.HasPrefix(content, "*") && len(content) > 1 && content[1] != ' ' {
430+
return true
431+
}
432+
433+
return false
434+
}
435+
436+
// hasFunctionCallPattern checks if the content looks like a function call.
437+
// Examples: "someFunc()", "pkg.Method(arg)", "New()".
438+
func hasFunctionCallPattern(content string) bool {
439+
// Simple heuristic: word followed by ( with something inside )
440+
return funcPattern.MatchString(content)
441+
}
442+
443+
// isDocCommentForDeclaration checks if the comment group is a Doc comment for any declaration.
444+
func isDocCommentForDeclaration(group *ast.CommentGroup, file *ast.File) bool {
445+
for _, decl := range file.Decls {
446+
switch d := decl.(type) {
447+
case *ast.GenDecl:
448+
if d.Doc == group {
449+
return true
450+
}
451+
case *ast.FuncDecl:
452+
if d.Doc == group {
453+
return true
454+
}
455+
}
456+
}
457+
458+
return false
459+
}
460+
193461
func extractFieldMarkers(field *ast.Field, results *markers) {
194462
if field == nil || field.Doc == nil {
195463
return
@@ -213,12 +481,20 @@ func extractFieldMarkers(field *ast.Field, results *markers) {
213481
// while supporting declarative validation tags with parentheses and nested markers.
214482
var validMarkerStart = regexp.MustCompile(`^[a-zA-Z]([a-zA-Z0-9:\(\)\"\" ,])+=?`)
215483

484+
// typePattern matches common Go field declaration patterns.
485+
// Examples: "Name string", "Count int", "Enabled *bool".
486+
var typePattern = regexp.MustCompile(`^\w+\s+\*?(string|int|int32|int64|uint|uint32|uint64|bool|float32|float64|byte|rune)\b`)
487+
488+
// funcPattern matches function call patterns.
489+
// Examples: "someFunc()", "pkg.Method(arg)", "New()".
490+
var funcPattern = regexp.MustCompile(`\w+(\.\w+)?\([^)]*\)`)
491+
216492
func extractMarker(comment *ast.Comment) Marker {
217-
if !strings.HasPrefix(comment.Text, "// +") {
493+
if !strings.HasPrefix(comment.Text, markerPrefix) {
218494
return Marker{}
219495
}
220496

221-
markerContent := strings.TrimPrefix(comment.Text, "// +")
497+
markerContent := strings.TrimPrefix(comment.Text, markerPrefix)
222498

223499
// Valid markers must start with an alphabetic character (a-zA-Z).
224500
// This excludes markdown tables (e.g., "// +-------") and other non-marker content,
@@ -460,7 +736,7 @@ type Marker struct {
460736

461737
// String returns the string representation of the marker.
462738
func (m Marker) String() string {
463-
return strings.TrimPrefix(m.RawComment, "// +")
739+
return strings.TrimPrefix(m.RawComment, markerPrefix)
464740
}
465741

466742
// MarkerSet is a set implementation for Markers that uses

0 commit comments

Comments
 (0)