Skip to content

Commit 847529f

Browse files
committed
Detect markers separated by blank lines from type declarations
- Modified marker analyzer to check the previous comment group - Added extractOrphanedMarkers and helper functions - Added constants for marker prefix and separation limits - Added logic to skip commented-out code blocks - Added test cases for List types and non-List types with blank lines Signed-off-by: Sascha Grunert <sgrunert@redhat.com>
1 parent ab50bf5 commit 847529f

File tree

3 files changed

+501
-6
lines changed

3 files changed

+501
-6
lines changed

pkg/analysis/helpers/markers/analyzer.go

Lines changed: 286 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,21 @@ import (
4545
// is "k8s:item(one: "value", two: "value")=...".
4646
const UnnamedArgument = ""
4747

48+
// maxMarkerSeparationLines is the maximum number of lines that can separate
49+
// a marker comment group from the godoc comment for it to still be considered
50+
// associated with the type declaration.
51+
const maxMarkerSeparationLines = 3
52+
53+
// markerPrefix is the prefix that identifies a comment line as a marker.
54+
const markerPrefix = "// +"
55+
4856
// Markers allows access to markers extracted from the
4957
// go types.
5058
type Markers interface {
5159
// FieldMarkers returns markers associated to the field.
5260
FieldMarkers(*ast.Field) MarkerSet
5361

54-
// StructMarkers returns markers associated to the given sturct.
62+
// StructMarkers returns markers associated to the given struct.
5563
StructMarkers(*ast.StructType) MarkerSet
5664

5765
// TypeMarkers returns markers associated to the given type.
@@ -154,7 +162,17 @@ func run(pass *analysis.Pass) (any, error) {
154162
inspect.Preorder(nodeFilter, func(n ast.Node) {
155163
switch typ := n.(type) {
156164
case *ast.GenDecl:
157-
extractGenDeclMarkers(typ, results)
165+
// Find the file this declaration belongs to
166+
var file *ast.File
167+
168+
for _, f := range pass.Files {
169+
if f.Pos() <= typ.Pos() && typ.End() <= f.End() {
170+
file = f
171+
break
172+
}
173+
}
174+
175+
extractGenDeclMarkers(typ, file, pass.Fset, results)
158176
case *ast.Field:
159177
extractFieldMarkers(typ, results)
160178
}
@@ -163,15 +181,20 @@ func run(pass *analysis.Pass) (any, error) {
163181
return results, nil
164182
}
165183

166-
func extractGenDeclMarkers(typ *ast.GenDecl, results *markers) {
184+
func extractGenDeclMarkers(typ *ast.GenDecl, file *ast.File, fset *token.FileSet, results *markers) {
167185
declMarkers := NewMarkerSet()
168186

187+
// Collect markers from the GenDecl's Doc field (comments directly attached to the declaration)
169188
if typ.Doc != nil {
170189
for _, comment := range typ.Doc.List {
171190
if marker := extractMarker(comment); marker.Identifier != "" {
172191
declMarkers.Insert(marker)
173192
}
174193
}
194+
195+
// Also collect markers from the comment group immediately before the godoc comment
196+
// if separated by a blank line.
197+
extractOrphanedMarkers(typ.Doc, file, fset, declMarkers)
175198
}
176199

177200
if len(typ.Specs) == 0 {
@@ -190,6 +213,255 @@ func extractGenDeclMarkers(typ *ast.GenDecl, results *markers) {
190213
}
191214
}
192215

216+
// extractOrphanedMarkers finds markers in the comment group immediately before the godoc comment
217+
// that are separated by a blank line. Only the immediately preceding comment group is checked,
218+
// and it must be within maxMarkerSeparationLines lines of the godoc comment.
219+
//
220+
// This handles the "second level comment bug" (issue #53) where markers are separated from type
221+
// declarations by blank lines, which commonly occurs in real-world Kubernetes API code.
222+
//
223+
// Example scenario this handles:
224+
//
225+
// // +kubebuilder:object:root=true
226+
// // +kubebuilder:subresource:status
227+
//
228+
// // ClusterList contains a list of Cluster.
229+
// type ClusterList struct {
230+
// metav1.TypeMeta `json:",inline"`
231+
// metav1.ListMeta `json:"metadata,omitempty"`
232+
// Items []Cluster `json:"items"`
233+
// }
234+
//
235+
// The markers will be detected even though separated by a blank line from the godoc comment.
236+
// Note: Only multi-line marker groups are considered orphaned. Single-line markers are assumed
237+
// to be regular Doc comments already handled by the AST parser.
238+
func extractOrphanedMarkers(docGroup *ast.CommentGroup, file *ast.File, fset *token.FileSet, declMarkers MarkerSet) {
239+
if file == nil || fset == nil {
240+
return
241+
}
242+
243+
prevGroup := findPreviousCommentGroup(docGroup, file)
244+
if prevGroup == nil {
245+
return
246+
}
247+
248+
if !isValidOrphanedMarkerGroup(prevGroup, docGroup, file, fset) {
249+
return
250+
}
251+
252+
// Extract markers from the previous comment group
253+
for _, comment := range prevGroup.List {
254+
if marker := extractMarker(comment); marker.Identifier != "" {
255+
declMarkers.Insert(marker)
256+
}
257+
}
258+
}
259+
260+
// findPreviousCommentGroup finds the comment group immediately before the given docGroup.
261+
func findPreviousCommentGroup(docGroup *ast.CommentGroup, file *ast.File) *ast.CommentGroup {
262+
for i, cg := range file.Comments {
263+
if cg == docGroup && i > 0 {
264+
return file.Comments[i-1]
265+
}
266+
}
267+
268+
return nil
269+
}
270+
271+
// isValidOrphanedMarkerGroup checks if the previous comment group is a valid orphaned marker group.
272+
func isValidOrphanedMarkerGroup(prevGroup, docGroup *ast.CommentGroup, file *ast.File, fset *token.FileSet) bool {
273+
// Check if the comment groups are properly separated
274+
if !isProperlySeparated(prevGroup, docGroup, fset) {
275+
return false
276+
}
277+
278+
// Only extract if the comment group has multiple lines and contains markers
279+
if !containsMultilineMarkers(prevGroup) {
280+
return false
281+
}
282+
283+
// Check if this previous comment group is a Doc comment for another declaration
284+
return !isDocCommentForDeclaration(prevGroup, file)
285+
}
286+
287+
// isProperlySeparated checks if comment groups are separated by at least one blank line.
288+
func isProperlySeparated(prevGroup, docGroup *ast.CommentGroup, fset *token.FileSet) bool {
289+
docStartLine := fset.Position(docGroup.Pos()).Line
290+
prevEndLine := fset.Position(prevGroup.End()).Line
291+
lineDiff := docStartLine - prevEndLine
292+
293+
// lineDiff > 1: ensures at least one blank line
294+
// lineDiff <= maxMarkerSeparationLines: ensures not too far apart
295+
return lineDiff > 1 && lineDiff <= maxMarkerSeparationLines
296+
}
297+
298+
// containsMultilineMarkers checks if a comment group contains at least one marker.
299+
// It also ensures the comment group doesn't contain commented-out code.
300+
//
301+
// This function detects both single-line and multi-line marker groups that are
302+
// separated from type declarations by blank lines (orphaned markers).
303+
//
304+
// Single-line comments immediately before a type declaration (without a blank line)
305+
// are already captured as Doc comments by the Go AST parser and processed normally.
306+
//
307+
// Example of what IS detected (orphaned markers separated by blank line):
308+
//
309+
// // +kubebuilder:object:root=true
310+
//
311+
// // MyType does something
312+
// type MyType struct {}
313+
//
314+
// Or multi-line:
315+
//
316+
// // +kubebuilder:object:root=true
317+
// // +kubebuilder:subresource:status
318+
//
319+
// // MyType does something
320+
// type MyType struct {}
321+
//
322+
// Example of what is NOT detected (marker without blank line, already handled as Doc comment):
323+
//
324+
// // +kubebuilder:object:root=true
325+
// // MyType does something
326+
// type MyType struct {}
327+
func containsMultilineMarkers(group *ast.CommentGroup) bool {
328+
if len(group.List) == 0 {
329+
return false
330+
}
331+
332+
hasMarker := false
333+
334+
for _, comment := range group.List {
335+
text := comment.Text
336+
if strings.HasPrefix(text, markerPrefix) {
337+
hasMarker = true
338+
} else if looksLikeCommentedCode(text) {
339+
// Skip comment groups that contain commented-out code
340+
return false
341+
}
342+
}
343+
344+
return hasMarker
345+
}
346+
347+
// looksLikeCommentedCode checks if a comment line looks like commented-out code.
348+
func looksLikeCommentedCode(text string) bool {
349+
content := prepareContentForAnalysis(text)
350+
351+
// Empty lines or lines starting with markers are not code
352+
if content == "" || strings.HasPrefix(content, "+") {
353+
return false
354+
}
355+
356+
return hasCodeIndicators(content)
357+
}
358+
359+
// prepareContentForAnalysis strips comment prefixes and normalizes the content.
360+
func prepareContentForAnalysis(text string) string {
361+
content := strings.TrimPrefix(text, "//")
362+
return strings.TrimSpace(content)
363+
}
364+
365+
// hasCodeIndicators checks if content contains patterns that indicate Go code.
366+
func hasCodeIndicators(content string) bool {
367+
// Check for struct tags (backticks are a strong signal of Go code)
368+
if strings.Contains(content, "`") {
369+
return true
370+
}
371+
372+
// Check for field declaration patterns
373+
if hasFieldDeclarationPattern(content) {
374+
return true
375+
}
376+
377+
// Check for assignment operators
378+
if hasAssignmentOperators(content) {
379+
return true
380+
}
381+
382+
// Check for function call patterns
383+
if hasFunctionCallPattern(content) {
384+
return true
385+
}
386+
387+
// Check for Go keywords at the start of the line
388+
return hasCodeKeywordPrefix(content)
389+
}
390+
391+
// hasAssignmentOperators checks if content contains Go assignment operators.
392+
func hasAssignmentOperators(content string) bool {
393+
assignmentOps := []string{" := ", " = ", " += ", " -= ", " *= ", " /="}
394+
for _, op := range assignmentOps {
395+
if strings.Contains(content, op) {
396+
return true
397+
}
398+
}
399+
400+
return false
401+
}
402+
403+
// hasCodeKeywordPrefix checks if content starts with Go code keywords.
404+
func hasCodeKeywordPrefix(content string) bool {
405+
// Go declaration keywords
406+
codeKeywords := []string{"func ", "type ", "var ", "const ", "import ", "package ", "struct ", "interface "}
407+
for _, keyword := range codeKeywords {
408+
if strings.HasPrefix(content, keyword) {
409+
return true
410+
}
411+
}
412+
413+
// Control flow keywords
414+
controlFlowKeywords := []string{"if ", "for ", "switch ", "case ", "return ", "break ", "continue ", "defer ", "go ", "select "}
415+
for _, keyword := range controlFlowKeywords {
416+
if strings.HasPrefix(content, keyword) {
417+
return true
418+
}
419+
}
420+
421+
return false
422+
}
423+
424+
// hasFieldDeclarationPattern checks if the content looks like a Go field declaration.
425+
// Examples: "Name string", "Count int", "Enabled *bool", "*Field Type".
426+
func hasFieldDeclarationPattern(content string) bool {
427+
// Look for common Go type names after a potential field name
428+
if typePattern.MatchString(content) {
429+
return true
430+
}
431+
432+
// Also check for pointer field declarations: *Type
433+
if strings.HasPrefix(content, "*") && len(content) > 1 && content[1] != ' ' {
434+
return true
435+
}
436+
437+
return false
438+
}
439+
440+
// hasFunctionCallPattern checks if the content looks like a function call.
441+
// Examples: "someFunc()", "pkg.Method(arg)", "New()".
442+
func hasFunctionCallPattern(content string) bool {
443+
// Simple heuristic: word followed by ( with something inside )
444+
return funcPattern.MatchString(content)
445+
}
446+
447+
// isDocCommentForDeclaration checks if the comment group is a Doc comment for any declaration.
448+
func isDocCommentForDeclaration(group *ast.CommentGroup, file *ast.File) bool {
449+
for _, decl := range file.Decls {
450+
switch d := decl.(type) {
451+
case *ast.GenDecl:
452+
if d.Doc == group {
453+
return true
454+
}
455+
case *ast.FuncDecl:
456+
if d.Doc == group {
457+
return true
458+
}
459+
}
460+
}
461+
462+
return false
463+
}
464+
193465
func extractFieldMarkers(field *ast.Field, results *markers) {
194466
if field == nil || field.Doc == nil {
195467
return
@@ -213,12 +485,20 @@ func extractFieldMarkers(field *ast.Field, results *markers) {
213485
// while supporting declarative validation tags with parentheses and nested markers.
214486
var validMarkerStart = regexp.MustCompile(`^[a-zA-Z]([a-zA-Z0-9:\(\)\"\" ,])+=?`)
215487

488+
// typePattern matches common Go field declaration patterns.
489+
// Examples: "Name string", "Count int", "Enabled *bool".
490+
var typePattern = regexp.MustCompile(`^\w+\s+\*?(string|int|int32|int64|uint|uint32|uint64|bool|float32|float64|byte|rune)\b`)
491+
492+
// funcPattern matches function call patterns.
493+
// Examples: "someFunc()", "pkg.Method(arg)", "New()".
494+
var funcPattern = regexp.MustCompile(`\w+(\.\w+)?\([^)]*\)`)
495+
216496
func extractMarker(comment *ast.Comment) Marker {
217-
if !strings.HasPrefix(comment.Text, "// +") {
497+
if !strings.HasPrefix(comment.Text, markerPrefix) {
218498
return Marker{}
219499
}
220500

221-
markerContent := strings.TrimPrefix(comment.Text, "// +")
501+
markerContent := strings.TrimPrefix(comment.Text, markerPrefix)
222502

223503
// Valid markers must start with an alphabetic character (a-zA-Z).
224504
// This excludes markdown tables (e.g., "// +-------") and other non-marker content,
@@ -460,7 +740,7 @@ type Marker struct {
460740

461741
// String returns the string representation of the marker.
462742
func (m Marker) String() string {
463-
return strings.TrimPrefix(m.RawComment, "// +")
743+
return strings.TrimPrefix(m.RawComment, markerPrefix)
464744
}
465745

466746
// MarkerSet is a set implementation for Markers that uses

0 commit comments

Comments
 (0)