Skip to content

Commit 321c9a1

Browse files
authored
Update table column integrity linter to avoid false positives (#58294)
1 parent e6a2523 commit 321c9a1

File tree

2 files changed

+222
-5
lines changed

2 files changed

+222
-5
lines changed

src/content-linter/lib/linting-rules/table-column-integrity.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ const TABLE_ROW_REGEX = /^\s*\|.*\|\s*$/
1010
// Regex to detect table separator rows (contains only |, :, -, and whitespace)
1111
const TABLE_SEPARATOR_REGEX = /^\s*\|[\s\-:|\s]*\|\s*$/
1212
// Regex to detect Liquid-only cells (whitespace, liquid tag, whitespace)
13-
const LIQUID_ONLY_CELL_REGEX = /^\s*{%\s*(ifversion|else|endif|elsif).*%}\s*$/
14-
13+
const LIQUID_ONLY_CELL_REGEX = /^\s*{%\s*(ifversion|else|endif|elsif|for|endfor).*%}\s*$/
14+
// Regex to use for splitting on non-escaped pipes only
15+
const NON_ESCAPED_PIPE_REGEX = /(?<!\\)\|/
1516
/**
1617
* Counts the number of columns in a table row by splitting on | and handling edge cases
1718
*/
@@ -24,8 +25,9 @@ function countColumns(row: string): number {
2425
return 0
2526
}
2627

27-
// Split by | and filter out empty cells at start/end (from leading/trailing |)
28-
const cells = trimmed.split('|')
28+
// Split by '|' (but ignore escaped '\|' as these are not true separators)
29+
// Filter out empty cells at start/end (from leading/trailing |)
30+
const cells = trimmed.split(NON_ESCAPED_PIPE_REGEX)
2931

3032
// Remove first and last elements if they're empty (from leading/trailing |)
3133
if (cells.length > 0 && cells[0].trim() === '') {
@@ -45,7 +47,7 @@ function isLiquidOnlyRow(row: string): boolean {
4547
const trimmed = row.trim()
4648
if (!trimmed.includes('|')) return false
4749

48-
const cells = trimmed.split('|')
50+
const cells = trimmed.split(NON_ESCAPED_PIPE_REGEX)
4951
// Remove empty cells from leading/trailing |
5052
const filteredCells = cells.filter((cell, index) => {
5153
if (index === 0 && cell.trim() === '') return false
@@ -72,10 +74,22 @@ export const tableColumnIntegrity = {
7274

7375
const lines = params.lines
7476
let inTable = false
77+
let inCodeFence = false
7578
let expectedColumnCount: number | null = null
7679

7780
for (let i = 0; i < lines.length; i++) {
7881
const line = lines[i]
82+
83+
// Toggle code fence state
84+
if (line.trim().startsWith('```')) {
85+
inCodeFence = !inCodeFence
86+
continue
87+
}
88+
89+
if (inCodeFence) {
90+
continue
91+
}
92+
7993
const isTableRow = TABLE_ROW_REGEX.test(line)
8094
const isSeparatorRow = TABLE_SEPARATOR_REGEX.test(line)
8195

src/content-linter/tests/unit/table-column-integrity-simple.ts

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,207 @@ describe(tableColumnIntegrity.names.join(' - '), () => {
6060
const errors = result.markdown
6161
expect(errors.length).toBe(0)
6262
})
63+
64+
test('Escaped pipes (\\|) are not counted as column separators', async () => {
65+
const markdown = [
66+
'| Command | Description |',
67+
'|---------|-------------|',
68+
'| `git log --oneline \\| head` | Shows recent commits |',
69+
'| `echo "hello \\| world"` | Prints text with pipe |',
70+
].join('\n')
71+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
72+
const errors = result.markdown
73+
expect(errors.length).toBe(0)
74+
})
75+
76+
test('Escaped pipes mixed with real separators work correctly', async () => {
77+
const markdown = [
78+
'| Code | Output | Notes |',
79+
'|------|--------|-------|',
80+
'| `echo "a \\| b" \\| wc` | 1 | Pipe in string and command |',
81+
'| `grep "x" file` | matches | No pipes here |',
82+
].join('\n')
83+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
84+
const errors = result.markdown
85+
expect(errors.length).toBe(0)
86+
})
87+
88+
test('Liquid for/endfor statements are ignored in table rows', async () => {
89+
const markdown = [
90+
'| Item | Details |',
91+
'|------|---------|',
92+
'| {% for item in collection %} |',
93+
'| Product A | Available |',
94+
'| Product B | Sold out |',
95+
'| {% endfor %} |',
96+
].join('\n')
97+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
98+
const errors = result.markdown
99+
expect(errors.length).toBe(0)
100+
})
101+
102+
test('Mixed liquid statements (ifversion, for, endif) are ignored', async () => {
103+
const markdown = [
104+
'| Feature | Status | Version |',
105+
'|---------|--------|---------|',
106+
'| {% ifversion ghes %} |',
107+
'| {% for version in site.data.versions %} |',
108+
'| Basic | Active | 1.0 |',
109+
'| {% endfor %} |',
110+
'| {% endif %} |',
111+
'| Advanced | Beta | 2.0 |',
112+
].join('\n')
113+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
114+
const errors = result.markdown
115+
expect(errors.length).toBe(0)
116+
})
117+
118+
test('Tables inside code fences are ignored', async () => {
119+
const markdown = [
120+
'Here is some example markdown:',
121+
'',
122+
'```markdown',
123+
'| Name | Age |',
124+
'|------|-----|',
125+
'| Alice | 25 | Extra column that would normally cause error |',
126+
'| Bob |',
127+
'```',
128+
'',
129+
'But this real table should be validated:',
130+
'',
131+
'| Product | Price |',
132+
'|---------|-------|',
133+
'| Widget | $10 |',
134+
].join('\n')
135+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
136+
const errors = result.markdown
137+
expect(errors.length).toBe(0)
138+
})
139+
140+
test('Tables inside different code fence types are ignored', async () => {
141+
const markdown = [
142+
'```',
143+
'| Malformed | Table |',
144+
'|-----------|-------|',
145+
'| Too | Many | Columns | Here |',
146+
'```',
147+
'',
148+
'```text',
149+
'| Another | Bad |',
150+
'|---------|-----|',
151+
'| Missing |',
152+
'```',
153+
'',
154+
'```yaml',
155+
'| YAML | Example |',
156+
'|------|---------|',
157+
'| key: | value | extra |',
158+
'```',
159+
].join('\n')
160+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
161+
const errors = result.markdown
162+
expect(errors.length).toBe(0)
163+
})
164+
165+
test('File paths with pipes are handled correctly (regression test)', async () => {
166+
// This test catches the specific issue from content/actions/tutorials/build-and-test-code/python.md
167+
// where the old regex /[^\\]\|/ was consuming characters before pipes and miscounting columns
168+
const markdown = [
169+
'| Directory | Ubuntu | macOS |',
170+
'|-----------|--------|-------|',
171+
'|**Tool Cache Directory** |`/opt/hostedtoolcache/*`|`/Users/runner/hostedtoolcache/*`|',
172+
'|**Python Tool Cache**|`/opt/hostedtoolcache/Python/*`|`/Users/runner/hostedtoolcache/Python/*`|',
173+
'|**PyPy Tool Cache**|`/opt/hostedtoolcache/PyPy/*`|`/Users/runner/hostedtoolcache/PyPy/*`|',
174+
].join('\n')
175+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
176+
const errors = result.markdown
177+
expect(errors.length).toBe(0)
178+
})
179+
180+
test('Complex file paths with multiple characters before pipes', async () => {
181+
// Additional test to ensure the lookbehind regex works with various characters before pipes
182+
const markdown = [
183+
'| Pattern | Linux Path | Windows Path |',
184+
'|---------|------------|--------------|',
185+
'| Cache | `/home/user/.cache/*` | `C:\\Users\\user\\AppData\\*` |',
186+
'| Logs | `/var/log/app/*` | `C:\\ProgramData\\logs\\*` |',
187+
'| Config | `/etc/myapp/*` | `C:\\Program Files\\MyApp\\*` |',
188+
].join('\n')
189+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
190+
const errors = result.markdown
191+
expect(errors.length).toBe(0)
192+
})
193+
194+
test('Code fence spanning multiple lines with tables inside', async () => {
195+
const markdown = [
196+
'Here is some documentation:',
197+
'',
198+
'```markdown',
199+
'# Example Document',
200+
'',
201+
'| Bad | Table |',
202+
'|-----|-------|',
203+
'| Missing | column | here | extra |',
204+
'| Another | bad | row |',
205+
'',
206+
'More content here',
207+
'```',
208+
'',
209+
'This real table should be validated:',
210+
'',
211+
'| Good | Table |',
212+
'|------|-------|',
213+
'| Valid | Row |',
214+
].join('\n')
215+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
216+
const errors = result.markdown
217+
expect(errors.length).toBe(0)
218+
})
219+
220+
test('Multiple code fences with tables between them', async () => {
221+
const markdown = [
222+
'```js',
223+
'| Bad | JS | Table |',
224+
'|-----|----|----|',
225+
'| Extra | column | here | bad |',
226+
'```',
227+
'',
228+
'Real table that should be checked:',
229+
'',
230+
'| Name | Status |',
231+
'|------|--------|',
232+
'| Test | Pass |',
233+
'',
234+
'```bash',
235+
'| Command | Output |',
236+
'|---------|--------|',
237+
'| ls | file1.txt | file2.txt | extra |',
238+
'```',
239+
].join('\n')
240+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
241+
const errors = result.markdown
242+
expect(errors.length).toBe(0)
243+
})
244+
245+
test('Code fence with language identifier', async () => {
246+
const markdown = [
247+
'```typescript',
248+
'const badTable = `',
249+
'| Name | Age |',
250+
'|------|-----|',
251+
'| Alice | 25 | Extra |',
252+
'`',
253+
'```',
254+
'',
255+
'```yaml',
256+
'table:',
257+
' - name: Bad',
258+
' - age: 30',
259+
' - extra: column',
260+
'```',
261+
].join('\n')
262+
const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
263+
const errors = result.markdown
264+
expect(errors.length).toBe(0)
265+
})
63266
})

0 commit comments

Comments
 (0)