Update table column integrity linter to avoid false positives (#58294)

sarahs · web-flow · commit 321c9a143b40 · 2025-10-31T16:19:47.000Z
diff --git a/src/content-linter/lib/linting-rules/table-column-integrity.ts b/src/content-linter/lib/linting-rules/table-column-integrity.ts
@@ -10,8 +10,9 @@ const TABLE_ROW_REGEX = /^\s*\|.*\|\s*$/
 // Regex to detect table separator rows (contains only |, :, -, and whitespace)
 const TABLE_SEPARATOR_REGEX = /^\s*\|[\s\-:|\s]*\|\s*$/
 // Regex to detect Liquid-only cells (whitespace, liquid tag, whitespace)
-const LIQUID_ONLY_CELL_REGEX = /^\s*{%\s*(ifversion|else|endif|elsif).*%}\s*$/
-
+const LIQUID_ONLY_CELL_REGEX = /^\s*{%\s*(ifversion|else|endif|elsif|for|endfor).*%}\s*$/
+// Regex to use for splitting on non-escaped pipes only
+const NON_ESCAPED_PIPE_REGEX = /(?<!\\)\|/
 /**
  * Counts the number of columns in a table row by splitting on | and handling edge cases
  */
@@ -24,8 +25,9 @@ function countColumns(row: string): number {
     return 0
   }
 
-  // Split by | and filter out empty cells at start/end (from leading/trailing |)
-  const cells = trimmed.split('|')
+  // Split by '|' (but ignore escaped '\|' as these are not true separators)
+  // Filter out empty cells at start/end (from leading/trailing |)
+  const cells = trimmed.split(NON_ESCAPED_PIPE_REGEX)
 
   // Remove first and last elements if they're empty (from leading/trailing |)
   if (cells.length > 0 && cells[0].trim() === '') {
@@ -45,7 +47,7 @@ function isLiquidOnlyRow(row: string): boolean {
   const trimmed = row.trim()
   if (!trimmed.includes('|')) return false
 
-  const cells = trimmed.split('|')
+  const cells = trimmed.split(NON_ESCAPED_PIPE_REGEX)
   // Remove empty cells from leading/trailing |
   const filteredCells = cells.filter((cell, index) => {
     if (index === 0 && cell.trim() === '') return false
@@ -72,10 +74,22 @@ export const tableColumnIntegrity = {
 
     const lines = params.lines
     let inTable = false
+    let inCodeFence = false
     let expectedColumnCount: number | null = null
 
     for (let i = 0; i < lines.length; i++) {
       const line = lines[i]
+
+      // Toggle code fence state
+      if (line.trim().startsWith('```')) {
+        inCodeFence = !inCodeFence
+        continue
+      }
+
+      if (inCodeFence) {
+        continue
+      }
+
       const isTableRow = TABLE_ROW_REGEX.test(line)
       const isSeparatorRow = TABLE_SEPARATOR_REGEX.test(line)
 
diff --git a/src/content-linter/tests/unit/table-column-integrity-simple.ts b/src/content-linter/tests/unit/table-column-integrity-simple.ts
@@ -60,4 +60,207 @@ describe(tableColumnIntegrity.names.join(' - '), () => {
     const errors = result.markdown
     expect(errors.length).toBe(0)
   })
+
+  test('Escaped pipes (\\|) are not counted as column separators', async () => {
+    const markdown = [
+      '| Command | Description |',
+      '|---------|-------------|',
+      '| `git log --oneline \\| head` | Shows recent commits |',
+      '| `echo "hello \\| world"` | Prints text with pipe |',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Escaped pipes mixed with real separators work correctly', async () => {
+    const markdown = [
+      '| Code | Output | Notes |',
+      '|------|--------|-------|',
+      '| `echo "a \\| b" \\| wc` | 1 | Pipe in string and command |',
+      '| `grep "x" file` | matches | No pipes here |',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Liquid for/endfor statements are ignored in table rows', async () => {
+    const markdown = [
+      '| Item | Details |',
+      '|------|---------|',
+      '| {% for item in collection %} |',
+      '| Product A | Available |',
+      '| Product B | Sold out |',
+      '| {% endfor %} |',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Mixed liquid statements (ifversion, for, endif) are ignored', async () => {
+    const markdown = [
+      '| Feature | Status | Version |',
+      '|---------|--------|---------|',
+      '| {% ifversion ghes %} |',
+      '| {% for version in site.data.versions %} |',
+      '| Basic | Active | 1.0 |',
+      '| {% endfor %} |',
+      '| {% endif %} |',
+      '| Advanced | Beta | 2.0 |',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Tables inside code fences are ignored', async () => {
+    const markdown = [
+      'Here is some example markdown:',
+      '',
+      '```markdown',
+      '| Name | Age |',
+      '|------|-----|',
+      '| Alice | 25 | Extra column that would normally cause error |',
+      '| Bob |',
+      '```',
+      '',
+      'But this real table should be validated:',
+      '',
+      '| Product | Price |',
+      '|---------|-------|',
+      '| Widget | $10 |',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Tables inside different code fence types are ignored', async () => {
+    const markdown = [
+      '```',
+      '| Malformed | Table |',
+      '|-----------|-------|',
+      '| Too | Many | Columns | Here |',
+      '```',
+      '',
+      '```text',
+      '| Another | Bad |',
+      '|---------|-----|',
+      '| Missing |',
+      '```',
+      '',
+      '```yaml',
+      '| YAML | Example |',
+      '|------|---------|',
+      '| key: | value | extra |',
+      '```',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('File paths with pipes are handled correctly (regression test)', async () => {
+    // This test catches the specific issue from content/actions/tutorials/build-and-test-code/python.md
+    // where the old regex /[^\\]\|/ was consuming characters before pipes and miscounting columns
+    const markdown = [
+      '| Directory | Ubuntu | macOS |',
+      '|-----------|--------|-------|',
+      '|**Tool Cache Directory** |`/opt/hostedtoolcache/*`|`/Users/runner/hostedtoolcache/*`|',
+      '|**Python Tool Cache**|`/opt/hostedtoolcache/Python/*`|`/Users/runner/hostedtoolcache/Python/*`|',
+      '|**PyPy Tool Cache**|`/opt/hostedtoolcache/PyPy/*`|`/Users/runner/hostedtoolcache/PyPy/*`|',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Complex file paths with multiple characters before pipes', async () => {
+    // Additional test to ensure the lookbehind regex works with various characters before pipes
+    const markdown = [
+      '| Pattern | Linux Path | Windows Path |',
+      '|---------|------------|--------------|',
+      '| Cache | `/home/user/.cache/*` | `C:\\Users\\user\\AppData\\*` |',
+      '| Logs | `/var/log/app/*` | `C:\\ProgramData\\logs\\*` |',
+      '| Config | `/etc/myapp/*` | `C:\\Program Files\\MyApp\\*` |',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Code fence spanning multiple lines with tables inside', async () => {
+    const markdown = [
+      'Here is some documentation:',
+      '',
+      '```markdown',
+      '# Example Document',
+      '',
+      '| Bad | Table |',
+      '|-----|-------|',
+      '| Missing | column | here | extra |',
+      '| Another | bad | row |',
+      '',
+      'More content here',
+      '```',
+      '',
+      'This real table should be validated:',
+      '',
+      '| Good | Table |',
+      '|------|-------|',
+      '| Valid | Row |',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Multiple code fences with tables between them', async () => {
+    const markdown = [
+      '```js',
+      '| Bad | JS | Table |',
+      '|-----|----|----|',
+      '| Extra | column | here | bad |',
+      '```',
+      '',
+      'Real table that should be checked:',
+      '',
+      '| Name | Status |',
+      '|------|--------|',
+      '| Test | Pass |',
+      '',
+      '```bash',
+      '| Command | Output |',
+      '|---------|--------|',
+      '| ls | file1.txt | file2.txt | extra |',
+      '```',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
+
+  test('Code fence with language identifier', async () => {
+    const markdown = [
+      '```typescript',
+      'const badTable = `',
+      '| Name | Age |',
+      '|------|-----|',
+      '| Alice | 25 | Extra |',
+      '`',
+      '```',
+      '',
+      '```yaml',
+      'table:',
+      '  - name: Bad',
+      '  - age: 30',
+      '  - extra: column',
+      '```',
+    ].join('\n')
+    const result = await runRule(tableColumnIntegrity, { strings: { markdown } })
+    const errors = result.markdown
+    expect(errors.length).toBe(0)
+  })
 })