From 59784000b645aeacc0cc4bb06daacc472220b636 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sat, 4 Oct 2025 15:37:46 +1000 Subject: [PATCH 1/9] fix: broken `robots:config` normalizing Fixes #233 --- .../server/composables/getPathRobotConfig.ts | 4 +- src/runtime/server/util.ts | 3 + src/util.ts | 2 +- test/e2e/hook-config.test.ts | 128 ++++++++++++ .../server/plugins/robots.ts | 54 +++++ test/fixtures/hook-config/nuxt.config.ts | 9 + test/fixtures/hook-config/pages/about.vue | 3 + test/fixtures/hook-config/pages/index.vue | 3 + .../hook-config/server/plugins/robots.ts | 21 ++ test/fixtures/hook-config/tsconfig.json | 3 + test/unit/normalizeGroup.test.ts | 185 ++++++++++++++++++ 11 files changed, 412 insertions(+), 3 deletions(-) create mode 100644 test/e2e/hook-config.test.ts create mode 100644 test/fixtures/hook-config-edge-cases/server/plugins/robots.ts create mode 100644 test/fixtures/hook-config/nuxt.config.ts create mode 100644 test/fixtures/hook-config/pages/about.vue create mode 100644 test/fixtures/hook-config/pages/index.vue create mode 100644 test/fixtures/hook-config/server/plugins/robots.ts create mode 100644 test/fixtures/hook-config/tsconfig.json create mode 100644 test/unit/normalizeGroup.test.ts diff --git a/src/runtime/server/composables/getPathRobotConfig.ts b/src/runtime/server/composables/getPathRobotConfig.ts index e349b33f..61c09f51 100644 --- a/src/runtime/server/composables/getPathRobotConfig.ts +++ b/src/runtime/server/composables/getPathRobotConfig.ts @@ -48,13 +48,13 @@ export function getPathRobotConfig(e: H3Event, options?: { userAgent?: string, s ...nitroApp._robots.ctx.groups.filter(g => g.userAgent.includes('*')), ] for (const group of groups) { - if (!group._indexable) { + if (group._indexable === false) { return { indexable: false, rule: robotsDisabledValue, debug: { source: '/robots.txt', - line: `Disallow: /`, + line: JSON.stringify(group), }, } } diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index 569d79ff..b4dbb85b 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -2,6 +2,7 @@ import type { H3Event } from 'h3' import type { NitroApp } from 'nitropack' import type { HookRobotsConfigContext } from '../types' import { useNitroApp } from 'nitropack/runtime' +import { normalizeGroup } from '../../util' import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots' export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) { @@ -13,6 +14,8 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit ...JSON.parse(JSON.stringify({ groups, sitemaps })), } await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) + // Normalize groups after hook to ensure all groups have _indexable property + generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) nitro._robots.ctx = generateRobotsTxtCtx return generateRobotsTxtCtx } diff --git a/src/util.ts b/src/util.ts index 361bcfa2..e0cab1e4 100644 --- a/src/util.ts +++ b/src/util.ts @@ -272,7 +272,7 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved { disallow, allow, contentUsage, - _indexable: !disallow.includes((rule: string) => rule === '/'), + _indexable: !disallow.includes('/'), _rules: [ ...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })), ...allow.map(r => ({ pattern: r, allow: true })), diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts new file mode 100644 index 00000000..62c92e3f --- /dev/null +++ b/test/e2e/hook-config.test.ts @@ -0,0 +1,128 @@ +import { createResolver } from '@nuxt/kit' +import { setup } from '@nuxt/test-utils' +import { describe, expect, it } from 'vitest' + +const { resolve } = createResolver(import.meta.url) + +process.env.NODE_ENV = 'production' + +describe('robots:config hook - issue #233', async () => { + await setup({ + rootDir: resolve('../../.playground'), + build: true, + server: true, + nuxtConfig: { + nitro: { + plugins: [], + }, + hooks: { + 'nitro:config': function (nitroConfig: any) { + nitroConfig.plugins = nitroConfig.plugins || [] + nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts')) + }, + }, + }, + }) + + it('generates robots.txt with groups from hook', async () => { + const robotsTxt = await $fetch('/robots.txt') + expect(robotsTxt).toContain('Disallow: /_cwa/*') + expect(robotsTxt).toContain('AhrefsBot') + }) + + it('should NOT block indexable pages when groups are added via hook', async () => { + // This test demonstrates the bug: pages that should be indexable + // are incorrectly marked as non-indexable because groups added via + // the hook are missing the _indexable property + const { headers: indexHeaders } = await $fetch.raw('/', { + headers: { + 'User-Agent': 'Mozilla/5.0', + }, + }) + + // BUG: This page should NOT have noindex header because: + // 1. The disallow rule is for /_cwa/* which doesn't match / + // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla + // However, because the groups added via hook lack _indexable property, + // getPathRobotConfig() incorrectly treats them as non-indexable at line 51 + + // BUG DEMONSTRATION: Currently this page is marked as non-indexable + // The actual value is "noindex, nofollow" which is WRONG + // It should contain "index" because: + // - The * user-agent group has disallow: /_cwa/* which doesn't match / + // - The AhrefsBot group doesn't apply to Mozilla user agent + // This test will FAIL until the bug is fixed + expect(indexHeaders.get('x-robots-tag')).toContain('index') + expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') + }) + + it('should correctly block paths matching disallow patterns', async () => { + // This should be blocked by the /_cwa/* rule even though page doesn't exist + // We test with ignoreResponseError to capture headers from 404 responses + const { headers } = await $fetch.raw('/_cwa/test', { + headers: { + 'User-Agent': 'Mozilla/5.0', + }, + ignoreResponseError: true, + }) + + expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + }) + + it('should block AhrefsBot from all paths', async () => { + const { headers: indexHeaders } = await $fetch.raw('/', { + headers: { + 'User-Agent': 'AhrefsBot', + }, + }) + + // AhrefsBot should be blocked everywhere + expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + }) + + // Edge case: Multiple hook calls shouldn't cause issues + it('should handle multiple hook calls without breaking normalization', async () => { + // Second request - the hook might be called again depending on caching + const { headers } = await $fetch.raw('/api/test', { + headers: { + 'User-Agent': 'Mozilla/5.0', + }, + ignoreResponseError: true, + }) + + // Should still work correctly on subsequent requests + expect(headers.get('x-robots-tag')).toBeDefined() + }) + + // Edge case: Empty user agent header + it('should handle requests with no user agent gracefully', async () => { + const { headers } = await $fetch.raw('/', { + headers: { + // No User-Agent header + }, + }) + + // Should still apply rules (defaults to * user agent) + expect(headers.get('x-robots-tag')).toBeDefined() + }) + + // Edge case: Case sensitivity in user agent matching + it('should handle user agent case variations', async () => { + const tests = [ + { ua: 'ahrefsbot', desc: 'lowercase' }, + { ua: 'AHREFSBOT', desc: 'uppercase' }, + { ua: 'AhRefsBot', desc: 'mixed case' }, + ] + + for (const { ua } of tests) { + const { headers } = await $fetch.raw('/', { + headers: { + 'User-Agent': ua, + }, + }) + + // User agent matching should be case-insensitive + expect(headers.get('x-robots-tag')).toContain('noindex') + } + }) +}) diff --git a/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts new file mode 100644 index 00000000..36407883 --- /dev/null +++ b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts @@ -0,0 +1,54 @@ +import { defineNitroPlugin } from '#imports' + +export default defineNitroPlugin((nitroApp) => { + nitroApp.hooks.hook('robots:config', async (ctx) => { + // Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash) + ctx.groups.push({ + userAgent: 'EdgeCaseBot1', + } as any) + + // Edge case 2: Add group that's already normalized (double normalization test) + ctx.groups.push({ + userAgent: ['EdgeCaseBot2'], + disallow: ['/'], + allow: [], + _indexable: false, + _rules: [{ pattern: '/', allow: false }], + } as any) + + // Edge case 3: Modify existing groups from config + // This tests if normalization preserves modifications + if (ctx.groups.length > 0) { + ctx.groups[0].disallow?.push('/hook-added-path') + } + + // Edge case 4: Add group with "/" mixed with other patterns + ctx.groups.push({ + userAgent: 'EdgeCaseBot3', + disallow: ['/admin', '/', '/api'], + }) + + // Edge case 5: Add group with non-array values (tests asArray conversion) + ctx.groups.push({ + userAgent: 'EdgeCaseBot4', + disallow: '/single-string-disallow', + allow: '/single-string-allow', + } as any) + + // Edge case 6: Add group with special characters and whitespace + ctx.groups.push({ + userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'], + disallow: [' /path-with-spaces ', '/normal'], + } as any) + + // Edge case 7: Completely remove groups (extreme case) + // Commented out because it would break robots.txt generation + // ctx.groups = [] + + // Edge case 8: Add duplicate user agents + ctx.groups.push({ + userAgent: '*', // Duplicate of default + disallow: ['/duplicate-test'], + }) + }) +}) diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts new file mode 100644 index 00000000..019abf56 --- /dev/null +++ b/test/fixtures/hook-config/nuxt.config.ts @@ -0,0 +1,9 @@ +import NuxteRobots from '../../../src/module' + +export default defineNuxtConfig({ + modules: [NuxteRobots], + compatibilityDate: '2024-04-03', + site: { + url: 'https://example.com', + }, +}) diff --git a/test/fixtures/hook-config/pages/about.vue b/test/fixtures/hook-config/pages/about.vue new file mode 100644 index 00000000..6f769cba --- /dev/null +++ b/test/fixtures/hook-config/pages/about.vue @@ -0,0 +1,3 @@ + diff --git a/test/fixtures/hook-config/pages/index.vue b/test/fixtures/hook-config/pages/index.vue new file mode 100644 index 00000000..77b1b733 --- /dev/null +++ b/test/fixtures/hook-config/pages/index.vue @@ -0,0 +1,3 @@ + diff --git a/test/fixtures/hook-config/server/plugins/robots.ts b/test/fixtures/hook-config/server/plugins/robots.ts new file mode 100644 index 00000000..27aebbc5 --- /dev/null +++ b/test/fixtures/hook-config/server/plugins/robots.ts @@ -0,0 +1,21 @@ +import { defineNitroPlugin } from '#imports' + +export default defineNitroPlugin((nitroApp) => { + // Replicate the user's code from issue #233 + nitroApp.hooks.hook('robots:config', async (ctx) => { + // Add groups via the hook - these will NOT be normalized + ctx.groups.push({ + userAgent: ['*'], + comment: ['Block all from operational endpoints'], + allow: [], + disallow: ['/_cwa/*'], + }) + + ctx.groups.push({ + userAgent: ['AhrefsBot'], + comment: ['Block AI crawlers'], + allow: [], + disallow: ['/'], + }) + }) +}) diff --git a/test/fixtures/hook-config/tsconfig.json b/test/fixtures/hook-config/tsconfig.json new file mode 100644 index 00000000..be599924 --- /dev/null +++ b/test/fixtures/hook-config/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "../../../.playground/.nuxt/tsconfig.json" +} diff --git a/test/unit/normalizeGroup.test.ts b/test/unit/normalizeGroup.test.ts new file mode 100644 index 00000000..6a565435 --- /dev/null +++ b/test/unit/normalizeGroup.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from 'vitest' +import { normalizeGroup } from '../../src/util' + +describe('normalizeGroup', () => { + it('should set _indexable to false when disallow includes "/"', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/'], + }) + + // BUG: This test currently FAILS + // The bug is at src/util.ts:275 which uses .includes() instead of .some() + // .includes() with a callback always returns false, so _indexable is always true + expect(group._indexable).toBe(false) + }) + + it('should set _indexable to true when disallow does not include "/"', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/_cwa/*', '/admin'], + }) + + expect(group._indexable).toBe(true) + }) + + it('should set _indexable to true when disallow is empty', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: [], + }) + + expect(group._indexable).toBe(true) + }) + + it('should set _indexable to false when disallow has "/" among other patterns', () => { + const group = normalizeGroup({ + userAgent: ['AhrefsBot'], + disallow: ['/', '/other'], + }) + + // BUG: This test currently FAILS due to the .includes() bug + expect(group._indexable).toBe(false) + }) + + it('should create _rules array from disallow and allow', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/admin', '/secret'], + allow: ['/secret/allowed'], + }) + + expect(group._rules).toEqual([ + { pattern: '/admin', allow: false }, + { pattern: '/secret', allow: false }, + { pattern: '/secret/allowed', allow: true }, + ]) + }) + + it('should normalize userAgent to array', () => { + const group = normalizeGroup({ + userAgent: 'Googlebot', + disallow: ['/admin'], + }) + + expect(group.userAgent).toEqual(['Googlebot']) + }) + + it('should default userAgent to ["*"] when not provided', () => { + const group = normalizeGroup({ + disallow: ['/admin'], + }) + + expect(group.userAgent).toEqual(['*']) + }) + + it('should filter out empty allow rules', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/admin'], + allow: ['', '/allowed', null, undefined], + }) + + expect(group.allow).toEqual(['/allowed']) + expect(group._rules).toContainEqual({ pattern: '/allowed', allow: true }) + }) + + // Edge case: disallow with "/" in different positions + it('should detect "/" at any position in disallow array', () => { + const group1 = normalizeGroup({ disallow: ['/', '/admin'] }) + const group2 = normalizeGroup({ disallow: ['/admin', '/'] }) + const group3 = normalizeGroup({ disallow: ['/admin', '/', '/secret'] }) + + expect(group1._indexable).toBe(false) + expect(group2._indexable).toBe(false) + expect(group3._indexable).toBe(false) + }) + + // Edge case: similar patterns to "/" that should NOT trigger _indexable: false + it('should only detect exact "/" match, not similar patterns', () => { + const group = normalizeGroup({ + disallow: ['/api', '/*', '//', '/path/', '/ ', ' /'], + }) + + expect(group._indexable).toBe(true) + }) + + // Edge case: double normalization (should be idempotent) + it('should handle double normalization without breaking', () => { + const input = { disallow: ['/'] } + const once = normalizeGroup(input) + const twice = normalizeGroup(once as any) + + expect(twice._indexable).toBe(false) + expect(twice.userAgent).toEqual(['*']) + }) + + // Edge case: empty disallow values mixed in + it('should filter out empty disallow rules from _rules but keep them for _indexable check', () => { + const group = normalizeGroup({ + disallow: ['', '/admin', null, undefined, '/'], + }) + + // asArray preserves null/undefined in arrays (doesn't filter them) + expect(group.disallow).toEqual(['', '/admin', null, undefined, '/']) + expect(group._indexable).toBe(false) // Should still detect '/' + expect(group._rules).toEqual([ + { pattern: '/admin', allow: false }, + { pattern: '/', allow: false }, + ]) // But .filter(Boolean) removes falsy values from _rules + }) + + // Edge case: non-string disallow values + it('should handle non-string disallow values gracefully', () => { + const group = normalizeGroup({ + disallow: ['/admin', 123 as any, false as any, '/'], + }) + + expect(group._indexable).toBe(false) + }) + + // Edge case: undefined/null group properties + it('should handle missing optional properties', () => { + const group = normalizeGroup({}) + + expect(group.userAgent).toEqual(['*']) + expect(group.disallow).toEqual([]) + expect(group.allow).toEqual([]) + expect(group._indexable).toBe(true) + expect(group._rules).toEqual([]) + }) + + // Edge case: contentUsage normalization + it('should normalize and filter contentUsage array', () => { + const group1 = normalizeGroup({ + contentUsage: 'noai', + }) + const group2 = normalizeGroup({ + contentUsage: ['noai', 'noimageai', '', null, undefined], + }) + + expect(group1.contentUsage).toEqual(['noai']) + expect(group2.contentUsage).toEqual(['noai', 'noimageai']) + }) + + // Edge case: Yandex-specific properties + it('should preserve additional properties like cleanParam', () => { + const group = normalizeGroup({ + disallow: ['/'], + cleanParam: ['param1', 'param2'], + } as any) + + expect(group._indexable).toBe(false) + expect((group as any).cleanParam).toEqual(['param1', 'param2']) + }) + + // Edge case: _skipI18n property preservation + it('should preserve _skipI18n internal property', () => { + const group = normalizeGroup({ + disallow: ['/admin'], + _skipI18n: true, + }) + + expect(group._skipI18n).toBe(true) + }) +}) From f034062e8129016df3be8707d604b86a29cc78f7 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sun, 5 Oct 2025 17:57:09 +1100 Subject: [PATCH 2/9] chore: clean up --- src/runtime/server/composables/getSiteRobotConfig.ts | 4 ++-- test/e2e/hook-config.test.ts | 11 +---------- test/unit/normalizeGroup.test.ts | 7 +++---- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/runtime/server/composables/getSiteRobotConfig.ts b/src/runtime/server/composables/getSiteRobotConfig.ts index 7f4ccd7e..b666aa86 100644 --- a/src/runtime/server/composables/getSiteRobotConfig.ts +++ b/src/runtime/server/composables/getSiteRobotConfig.ts @@ -1,7 +1,7 @@ import type { H3Event } from 'h3' import type { ParsedRobotsTxt } from '../../types' +import { getSiteConfig } from '#site-config/server/composables' import { getSiteIndexable } from '#site-config/server/composables/getSiteIndexable' -import { useSiteConfig } from '#site-config/server/composables/useSiteConfig' import { getQuery } from 'h3' import { useRuntimeConfigNuxtRobots } from './useRuntimeConfigNuxtRobots' @@ -14,7 +14,7 @@ export function getSiteRobotConfig(e: H3Event): { indexable: boolean, hints: str // allow previewing with ?mockProductionEnv const queryIndexableEnabled = String(query.mockProductionEnv) === 'true' || query.mockProductionEnv === '' if ((debug || import.meta.dev)) { - const { _context } = useSiteConfig(e, { debug: debug || import.meta.dev }) + const { _context } = getSiteConfig(e, { debug: debug || import.meta.dev }) if (queryIndexableEnabled) { indexable = true hints.push('You are mocking a production enviroment with ?mockProductionEnv query.') diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts index 62c92e3f..70d38b8c 100644 --- a/test/e2e/hook-config.test.ts +++ b/test/e2e/hook-config.test.ts @@ -40,18 +40,9 @@ describe('robots:config hook - issue #233', async () => { }, }) - // BUG: This page should NOT have noindex header because: + // This page should NOT have noindex header because: // 1. The disallow rule is for /_cwa/* which doesn't match / // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla - // However, because the groups added via hook lack _indexable property, - // getPathRobotConfig() incorrectly treats them as non-indexable at line 51 - - // BUG DEMONSTRATION: Currently this page is marked as non-indexable - // The actual value is "noindex, nofollow" which is WRONG - // It should contain "index" because: - // - The * user-agent group has disallow: /_cwa/* which doesn't match / - // - The AhrefsBot group doesn't apply to Mozilla user agent - // This test will FAIL until the bug is fixed expect(indexHeaders.get('x-robots-tag')).toContain('index') expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') }) diff --git a/test/unit/normalizeGroup.test.ts b/test/unit/normalizeGroup.test.ts index 6a565435..564de77d 100644 --- a/test/unit/normalizeGroup.test.ts +++ b/test/unit/normalizeGroup.test.ts @@ -8,9 +8,6 @@ describe('normalizeGroup', () => { disallow: ['/'], }) - // BUG: This test currently FAILS - // The bug is at src/util.ts:275 which uses .includes() instead of .some() - // .includes() with a callback always returns false, so _indexable is always true expect(group._indexable).toBe(false) }) @@ -38,7 +35,6 @@ describe('normalizeGroup', () => { disallow: ['/', '/other'], }) - // BUG: This test currently FAILS due to the .includes() bug expect(group._indexable).toBe(false) }) @@ -77,6 +73,7 @@ describe('normalizeGroup', () => { const group = normalizeGroup({ userAgent: ['*'], disallow: ['/admin'], + // @ts-expect-error untyped allow: ['', '/allowed', null, undefined], }) @@ -117,6 +114,7 @@ describe('normalizeGroup', () => { // Edge case: empty disallow values mixed in it('should filter out empty disallow rules from _rules but keep them for _indexable check', () => { const group = normalizeGroup({ + // @ts-expect-error untyped disallow: ['', '/admin', null, undefined, '/'], }) @@ -155,6 +153,7 @@ describe('normalizeGroup', () => { contentUsage: 'noai', }) const group2 = normalizeGroup({ + // @ts-expect-error untyped contentUsage: ['noai', 'noimageai', '', null, undefined], }) From 8473f3ea130d09a19a30b7fc5fb8be23b7a26991 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sun, 5 Oct 2025 18:36:25 +1100 Subject: [PATCH 3/9] fix: don't renormalize --- src/runtime/server/util.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index b4dbb85b..5dc02a19 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -14,8 +14,6 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit ...JSON.parse(JSON.stringify({ groups, sitemaps })), } await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) - // Normalize groups after hook to ensure all groups have _indexable property - generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) nitro._robots.ctx = generateRobotsTxtCtx return generateRobotsTxtCtx } From c0c1b09403087a55a3e667b7cbf5206dc8360cb7 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sun, 5 Oct 2025 18:43:30 +1100 Subject: [PATCH 4/9] chore: renormalize --- src/runtime/server/util.ts | 1 + src/runtime/types.ts | 1 + src/util.ts | 14 +++++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index 5dc02a19..14cd927d 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -14,6 +14,7 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit ...JSON.parse(JSON.stringify({ groups, sitemaps })), } await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) + generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) nitro._robots.ctx = generateRobotsTxtCtx return generateRobotsTxtCtx } diff --git a/src/runtime/types.ts b/src/runtime/types.ts index 5ff49565..c56c7f7c 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -79,6 +79,7 @@ export interface RobotsGroupResolved { // runtime optimization _indexable?: boolean _rules?: { pattern: string, allow: boolean }[] + _normalized?: boolean } export interface HookRobotsTxtContext { diff --git a/src/util.ts b/src/util.ts index e0cab1e4..77e1163f 100644 --- a/src/util.ts +++ b/src/util.ts @@ -262,7 +262,18 @@ export function asArray(v: any) { return typeof v === 'undefined' ? [] : (Array.isArray(v) ? v : [v]) } -export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved { +export function normalizeGroup(group: RobotsGroupInput | RobotsGroupResolved): RobotsGroupResolved { + // quick renormalization check + if ((group as RobotsGroupResolved)._normalized) { + const resolvedGroup = group as RobotsGroupResolved + const disallow = asArray(resolvedGroup.disallow) // we can have empty disallow + resolvedGroup._indexable = !disallow.includes('/') + resolvedGroup._rules = [ + ...resolvedGroup.disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })), + ...resolvedGroup.allow.map(r => ({ pattern: r, allow: true })), + ] + return resolvedGroup + } const disallow = asArray(group.disallow) // we can have empty disallow const allow = asArray(group.allow).filter(rule => Boolean(rule)) const contentUsage = asArray(group.contentUsage).filter(rule => Boolean(rule)) @@ -277,6 +288,7 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved { ...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })), ...allow.map(r => ({ pattern: r, allow: true })), ], + _normalized: true, } } From f41467be71034cfcd9c067628c5a9f34db73d2dd Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Mon, 6 Oct 2025 01:34:20 +1100 Subject: [PATCH 5/9] feat: nitro hook `robots:init` and `robots:robots-txt:input` --- docs/content/3.nitro-api/2.nitro-hooks.md | 70 +++++++-- src/module.ts | 2 + src/runtime/server/plugins/initContext.ts | 35 ++++- src/runtime/server/routes/robots-txt.ts | 50 +++++-- src/runtime/server/util.ts | 36 ++--- src/runtime/types.ts | 15 ++ test/e2e/hook-config.test.ts | 15 +- .../server/plugins/robots.ts | 18 ++- .../hook-config/server/plugins/robots.ts | 16 +- test/unit/normalizeRobotsContext.test.ts | 137 ++++++++++++++++++ 10 files changed, 332 insertions(+), 62 deletions(-) create mode 100644 test/unit/normalizeRobotsContext.test.ts diff --git a/docs/content/3.nitro-api/2.nitro-hooks.md b/docs/content/3.nitro-api/2.nitro-hooks.md index cd2635af..27077b38 100644 --- a/docs/content/3.nitro-api/2.nitro-hooks.md +++ b/docs/content/3.nitro-api/2.nitro-hooks.md @@ -3,7 +3,7 @@ title: Nitro Hooks description: Learn how to use Nitro hooks to modify the robots final output. --- -## `'robots:config'`{lang="ts"} +## `'robots:init'`{lang="ts"} **Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} @@ -11,28 +11,74 @@ description: Learn how to use Nitro hooks to modify the robots final output. interface HookContext { groups: RobotsGroupResolved[] sitemaps: string[] - context: 'robots.txt' | 'init' - event?: H3Event // undefined on `init` + errors: string[] } ``` -Modify the robots config before it's used to generate the indexing rules. +Modify the robots config during Nitro initialization. This is called once when Nitro starts. -This is called when Nitro starts `init` as well as when generating the robots.txt `robots.txt`. +Use this hook when you need to fetch or compute robot rules at startup and cache them for all subsequent requests. -```ts [server/plugins/robots-ignore-routes.ts] +```ts [server/plugins/robots-init.ts] export default defineNitroPlugin((nitroApp) => { - nitroApp.hooks.hook('robots:config', async (ctx) => { - // extend the robot.txt rules at runtime - if (ctx.context === 'init') { - // probably want to cache this - const ignoredRoutes = await $fetch('/api/ignored-routes') - ctx.groups[0].disallow.push(...ignoredRoutes) + nitroApp.hooks.hook('robots:init', async (ctx) => { + // Fetch ignored routes at startup and cache them + const ignoredRoutes = await $fetch('/api/ignored-routes') + ctx.groups[0].disallow.push(...ignoredRoutes) + }) +}) +``` + +## `'robots:robots-txt:input'`{lang="ts"} + +**Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} + +```ts +interface HookContext { + groups: RobotsGroupResolved[] + sitemaps: string[] + errors: string[] + event: H3Event +} +``` + +Modify the robots config before generating the robots.txt file. This is called on each robots.txt request. + +Use this hook when you need to customize the robots.txt output based on the request context (e.g., headers). + +```ts [server/plugins/robots-dynamic.ts] +export default defineNitroPlugin((nitroApp) => { + nitroApp.hooks.hook('robots:robots-txt:input', async (ctx) => { + // Dynamically adjust rules based on request + const isDevelopment = ctx.event.headers.get('x-development') === 'true' + if (isDevelopment) { + ctx.groups[0].disallow.push('/staging/*') } }) }) ``` +## `'robots:config'`{lang="ts"} Deprecated + +**Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} + +::callout{type="warning"} +This hook is deprecated. Use `robots:init` for initialization or `robots:robots-txt:input` for robots.txt generation instead. +:: + +```ts +interface HookContext { + groups: RobotsGroupResolved[] + sitemaps: string[] + context: 'robots.txt' | 'init' + event?: H3Event // undefined on `init` +} +``` + +This hook was used for both initialization and robots.txt generation. It has been split into two separate hooks for better clarity: +- Use `robots:init` when `context === 'init'` +- Use `robots:robots-txt:input` when `context === 'robots.txt'` + ## `'robots:robots-txt'`{lang="ts"} **Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} diff --git a/src/module.ts b/src/module.ts index c67c7a60..4c6515e1 100644 --- a/src/module.ts +++ b/src/module.ts @@ -541,7 +541,9 @@ export default defineNuxtModule({ } interface NitroRuntimeHooks { 'robots:config': (ctx: import('${typesPath}').HookRobotsConfigContext) => void | Promise + 'robots:init': (ctx: import('${typesPath}').HookRobotsInitContext) => void | Promise 'robots:robots-txt': (ctx: import('${typesPath}').HookRobotsTxtContext) => void | Promise + 'robots:robots-txt:input': (ctx: import('${typesPath}').HookRobotsTxtInputContext) => void | Promise }` return `// Generated by nuxt-robots diff --git a/src/runtime/server/plugins/initContext.ts b/src/runtime/server/plugins/initContext.ts index 9151af5c..0f6b733f 100644 --- a/src/runtime/server/plugins/initContext.ts +++ b/src/runtime/server/plugins/initContext.ts @@ -1,10 +1,11 @@ import type { NitroApp } from 'nitropack/types' +import type { HookRobotsConfigContext, HookRobotsInitContext } from '../../types' import { defineNitroPlugin, getRouteRules } from 'nitropack/runtime' import { withoutTrailingSlash } from 'ufo' -import { createPatternMap } from '../../../util' +import { createPatternMap, normalizeGroup } from '../../../util' import { useRuntimeConfigNuxtRobots } from '../composables/useRuntimeConfigNuxtRobots' import { logger } from '../logger' -import { resolveRobotsTxtContext } from '../util' +import { normalizeRobotsContext } from '../util' const PRERENDER_NO_SSR_ROUTES = new Set(['/index.html', '/200.html', '/404.html']) @@ -19,7 +20,35 @@ export default defineNitroPlugin(async (nitroApp: NitroApp) => { } nitroApp._robots = {} as typeof nitroApp._robots - await resolveRobotsTxtContext(undefined, nitroApp) + + // Get and normalize base context from runtime config + const { groups, sitemap: sitemaps } = useRuntimeConfigNuxtRobots() + const baseCtx = normalizeRobotsContext({ + groups: JSON.parse(JSON.stringify(groups)), + sitemaps: JSON.parse(JSON.stringify(sitemaps)), + }) + + // Call robots:init hook + const initCtx: HookRobotsInitContext = { + ...baseCtx, + } + await nitroApp.hooks.callHook('robots:init', initCtx) + + // Backwards compatibility: also call deprecated robots:config hook + const deprecatedCtx: HookRobotsConfigContext = { + ...initCtx, + event: undefined, + context: 'init', + } + await nitroApp.hooks.callHook('robots:config', deprecatedCtx) + + // Sync changes back and re-normalize + initCtx.groups = deprecatedCtx.groups.map(normalizeGroup) + initCtx.sitemaps = deprecatedCtx.sitemaps + initCtx.errors = deprecatedCtx.errors + + // Store in nitro app + nitroApp._robots.ctx = { ...initCtx, context: 'init', event: undefined } const nuxtContentUrls = new Set() if (isNuxtContentV2) { let urls: string[] | undefined diff --git a/src/runtime/server/routes/robots-txt.ts b/src/runtime/server/routes/robots-txt.ts index fc2fbc08..eb7874d7 100644 --- a/src/runtime/server/routes/robots-txt.ts +++ b/src/runtime/server/routes/robots-txt.ts @@ -1,12 +1,12 @@ -import type { HookRobotsConfigContext, HookRobotsTxtContext } from '../../types' +import type { HookRobotsConfigContext, HookRobotsTxtContext, HookRobotsTxtInputContext } from '../../types' import { logger } from '#robots/server/logger' import { withSiteUrl } from '#site-config/server/composables/utils' import { defineEventHandler, setHeader } from 'h3' import { useNitroApp } from 'nitropack/runtime' -import { asArray, generateRobotsTxt } from '../../util' +import { generateRobotsTxt, normalizeGroup } from '../../util' import { getSiteRobotConfig } from '../composables/getSiteRobotConfig' import { useRuntimeConfigNuxtRobots } from '../composables/useRuntimeConfigNuxtRobots' -import { resolveRobotsTxtContext } from '../util' +import { normalizeRobotsContext } from '../util' export default defineEventHandler(async (e) => { const nitroApp = useNitroApp() @@ -26,13 +26,43 @@ export default defineEventHandler(async (e) => { ], } if (indexable) { - robotsTxtCtx = await resolveRobotsTxtContext(e) - // normalise - robotsTxtCtx.sitemaps = [...new Set( - asArray(robotsTxtCtx.sitemaps) - // validate sitemaps are absolute - .map(s => !s.startsWith('http') ? withSiteUrl(e, s, { withBase: true, absolute: true }) : s), - )] + // Start from cached init context (includes robots:init modifications) + // or get fresh from runtime config and normalize + const { groups, sitemap: sitemaps } = useRuntimeConfigNuxtRobots(e) + const baseCtx = nitroApp._robots.ctx + ? { + groups: nitroApp._robots.ctx.groups, + sitemaps: JSON.parse(JSON.stringify(nitroApp._robots.ctx.sitemaps)), + errors: [], + } + : normalizeRobotsContext({ groups, sitemaps, errors: [] }) + + // Call robots:robots-txt:input hook + const inputCtx: HookRobotsTxtInputContext = { + ...baseCtx, + event: e, + } + await nitroApp.hooks.callHook('robots:robots-txt:input', inputCtx) + + // Backwards compatibility: also call deprecated robots:config hook + const deprecatedCtx: HookRobotsConfigContext = { + ...inputCtx, + context: 'robots.txt', + } + await nitroApp.hooks.callHook('robots:config', deprecatedCtx) + + // Sync changes back and re-normalize + inputCtx.groups = deprecatedCtx.groups.map(normalizeGroup) + inputCtx.sitemaps = deprecatedCtx.sitemaps + inputCtx.errors = deprecatedCtx.errors + + // Update nitro._robots.ctx so getPathRobotConfig can access the latest groups + nitroApp._robots.ctx = { ...inputCtx, context: 'robots.txt', event: e } + + robotsTxtCtx = inputCtx + // Make sitemaps absolute (already normalized and deduplicated in normalizeRobotsContext) + robotsTxtCtx.sitemaps = robotsTxtCtx.sitemaps + .map(s => !s.startsWith('http') ? withSiteUrl(e, s, { withBase: true, absolute: true }) : s) if (isNuxtContentV2) { const contentWithRobotRules = await e.$fetch('/__robots__/nuxt-content.json', { headers: { diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index 14cd927d..e6d8fc5f 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -1,20 +1,22 @@ -import type { H3Event } from 'h3' -import type { NitroApp } from 'nitropack' -import type { HookRobotsConfigContext } from '../types' -import { useNitroApp } from 'nitropack/runtime' -import { normalizeGroup } from '../../util' -import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots' +import type { ParsedRobotsTxt, RobotsGroupInput } from '../types' +import { asArray, normalizeGroup } from '../../util' -export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) { - const { groups, sitemap: sitemaps } = useRuntimeConfigNuxtRobots(e) - // make the config writable - const generateRobotsTxtCtx: HookRobotsConfigContext = { - event: e, - context: e ? 'robots.txt' : 'init', - ...JSON.parse(JSON.stringify({ groups, sitemaps })), +/** + * Pure normalization function for robots context + * - Groups are normalized with _indexable and _rules + * - Sitemaps are converted to array, deduplicated, and filtered for valid strings + * - Errors are converted to array and filtered for valid strings + * + * Note: URL absolutization (withSiteUrl) happens separately in robots-txt.ts since it requires H3Event + */ +export function normalizeRobotsContext(input: Partial): ParsedRobotsTxt { + return { + groups: asArray(input.groups).map(g => normalizeGroup(g as RobotsGroupInput)), + sitemaps: [...new Set( + asArray(input.sitemaps) + .filter(s => typeof s === 'string' && s.trim().length > 0), + )], + errors: asArray(input.errors) + .filter(e => typeof e === 'string' && e.trim().length > 0), } - await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) - generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) - nitro._robots.ctx = generateRobotsTxtCtx - return generateRobotsTxtCtx } diff --git a/src/runtime/types.ts b/src/runtime/types.ts index c56c7f7c..c9fe01d1 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -92,6 +92,21 @@ export interface HookRobotsConfigContext extends ParsedRobotsTxt { context: 'robots.txt' | 'init' } +/** + * Hook context for robots:init + * Called once during Nitro initialization + */ +export interface HookRobotsInitContext extends ParsedRobotsTxt { +} + +/** + * Hook context for robots:robots-txt:input + * Called on each robots.txt request + */ +export interface HookRobotsTxtInputContext extends ParsedRobotsTxt { + event: H3Event +} + // Bot Detection Types export interface BotDetectionContext { isBot: boolean diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts index 70d38b8c..a4f05efb 100644 --- a/test/e2e/hook-config.test.ts +++ b/test/e2e/hook-config.test.ts @@ -6,7 +6,7 @@ const { resolve } = createResolver(import.meta.url) process.env.NODE_ENV = 'production' -describe('robots:config hook - issue #233', async () => { +describe('hook system (robots:robots-txt:input)', async () => { await setup({ rootDir: resolve('../../.playground'), build: true, @@ -24,16 +24,21 @@ describe('robots:config hook - issue #233', async () => { }, }) - it('generates robots.txt with groups from hook', async () => { + it('robots:robots-txt:input hook is called and can add groups', async () => { const robotsTxt = await $fetch('/robots.txt') + // Should include groups added via robots:robots-txt:input hook expect(robotsTxt).toContain('Disallow: /_cwa/*') expect(robotsTxt).toContain('AhrefsBot') }) + it('robots:robots-txt:input hook receives normalized groups', async () => { + // Groups should be normalized with _indexable property + // Pages that don't match disallow patterns should be indexable + const { headers } = await $fetch.raw('/') + expect(headers.get('x-robots-tag')).toContain('index') + }) + it('should NOT block indexable pages when groups are added via hook', async () => { - // This test demonstrates the bug: pages that should be indexable - // are incorrectly marked as non-indexable because groups added via - // the hook are missing the _indexable property const { headers: indexHeaders } = await $fetch.raw('/', { headers: { 'User-Agent': 'Mozilla/5.0', diff --git a/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts index 36407883..bf9feb57 100644 --- a/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts +++ b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts @@ -1,7 +1,9 @@ -import { defineNitroPlugin } from '#imports' +import type { NitroApp } from 'nitropack' +import type { HookRobotsTxtInputContext } from '../../../../../src/runtime/types' +import { defineNitroPlugin } from 'nitropack/runtime' -export default defineNitroPlugin((nitroApp) => { - nitroApp.hooks.hook('robots:config', async (ctx) => { +export default defineNitroPlugin((nitroApp: NitroApp) => { + nitroApp.hooks.hook('robots:robots-txt:input', async (ctx: HookRobotsTxtInputContext) => { // Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash) ctx.groups.push({ userAgent: 'EdgeCaseBot1', @@ -18,15 +20,15 @@ export default defineNitroPlugin((nitroApp) => { // Edge case 3: Modify existing groups from config // This tests if normalization preserves modifications - if (ctx.groups.length > 0) { + if (ctx.groups.length > 0 && ctx.groups[0]) { ctx.groups[0].disallow?.push('/hook-added-path') } // Edge case 4: Add group with "/" mixed with other patterns ctx.groups.push({ - userAgent: 'EdgeCaseBot3', + userAgent: ['EdgeCaseBot3'], disallow: ['/admin', '/', '/api'], - }) + } as any) // Edge case 5: Add group with non-array values (tests asArray conversion) ctx.groups.push({ @@ -47,8 +49,8 @@ export default defineNitroPlugin((nitroApp) => { // Edge case 8: Add duplicate user agents ctx.groups.push({ - userAgent: '*', // Duplicate of default + userAgent: ['*'], // Duplicate of default disallow: ['/duplicate-test'], - }) + } as any) }) }) diff --git a/test/fixtures/hook-config/server/plugins/robots.ts b/test/fixtures/hook-config/server/plugins/robots.ts index 27aebbc5..a3abb6c5 100644 --- a/test/fixtures/hook-config/server/plugins/robots.ts +++ b/test/fixtures/hook-config/server/plugins/robots.ts @@ -1,21 +1,23 @@ -import { defineNitroPlugin } from '#imports' +import type { NitroApp } from 'nitropack' +import type { HookRobotsTxtInputContext } from '../../../../../src/runtime/types' +import { defineNitroPlugin } from 'nitropack/runtime' -export default defineNitroPlugin((nitroApp) => { - // Replicate the user's code from issue #233 - nitroApp.hooks.hook('robots:config', async (ctx) => { - // Add groups via the hook - these will NOT be normalized +export default defineNitroPlugin((nitroApp: NitroApp) => { + // Test the new robots:robots-txt:input hook + nitroApp.hooks.hook('robots:robots-txt:input', async (ctx: HookRobotsTxtInputContext) => { + // Add groups via the hook ctx.groups.push({ userAgent: ['*'], comment: ['Block all from operational endpoints'], allow: [], disallow: ['/_cwa/*'], - }) + } as any) ctx.groups.push({ userAgent: ['AhrefsBot'], comment: ['Block AI crawlers'], allow: [], disallow: ['/'], - }) + } as any) }) }) diff --git a/test/unit/normalizeRobotsContext.test.ts b/test/unit/normalizeRobotsContext.test.ts new file mode 100644 index 00000000..024f5943 --- /dev/null +++ b/test/unit/normalizeRobotsContext.test.ts @@ -0,0 +1,137 @@ +import { describe, expect, it } from 'vitest' +import { normalizeRobotsContext } from '../../src/runtime/server/util' + +describe('normalizeRobotsContext', () => { + it('should normalize empty input', () => { + const result = normalizeRobotsContext({}) + + expect(result.groups).toEqual([]) + expect(result.sitemaps).toEqual([]) + expect(result.errors).toEqual([]) + }) + + it('should normalize groups with _indexable and _rules', () => { + const result = normalizeRobotsContext({ + groups: [ + { + userAgent: ['*'], + disallow: ['/'], + } as any, + ], + }) + + expect(result.groups).toHaveLength(1) + expect(result.groups[0]).toHaveProperty('_indexable', false) + expect(result.groups[0]).toHaveProperty('_rules') + }) + + it('should convert string sitemap to array', () => { + const result = normalizeRobotsContext({ + sitemaps: '/sitemap.xml' as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml']) + }) + + it('should keep array sitemaps as array', () => { + const result = normalizeRobotsContext({ + sitemaps: ['/sitemap.xml', '/sitemap2.xml'], + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should filter out falsy sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: ['', '/sitemap.xml', null, undefined, '/sitemap2.xml'] as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should filter out whitespace-only sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: [' ', '/sitemap.xml', '\t', '/sitemap2.xml'] as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should deduplicate sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: ['/sitemap.xml', '/sitemap2.xml', '/sitemap.xml', '/sitemap2.xml'], + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should filter out non-string sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: ['/sitemap.xml', 123, { url: '/sitemap' }, '/sitemap2.xml'] as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should convert string errors to array', () => { + const result = normalizeRobotsContext({ + errors: 'Something went wrong' as any, + }) + + expect(result.errors).toEqual(['Something went wrong']) + }) + + it('should filter out falsy errors', () => { + const result = normalizeRobotsContext({ + errors: ['', 'Error 1', null, undefined, 'Error 2'] as any, + }) + + expect(result.errors).toEqual(['Error 1', 'Error 2']) + }) + + it('should filter out whitespace-only errors', () => { + const result = normalizeRobotsContext({ + errors: [' ', 'Error 1', '\n\t', 'Error 2'] as any, + }) + + expect(result.errors).toEqual(['Error 1', 'Error 2']) + }) + + it('should filter out non-string errors', () => { + const result = normalizeRobotsContext({ + errors: ['Error 1', 123, { message: 'error' }, 'Error 2'] as any, + }) + + expect(result.errors).toEqual(['Error 1', 'Error 2']) + }) + + it('should handle undefined values gracefully', () => { + const result = normalizeRobotsContext({ + groups: undefined, + sitemaps: undefined, + errors: undefined, + }) + + expect(result.groups).toEqual([]) + expect(result.sitemaps).toEqual([]) + expect(result.errors).toEqual([]) + }) + + it('should normalize complete input', () => { + const result = normalizeRobotsContext({ + groups: [ + { + userAgent: ['Googlebot'], + disallow: ['/admin'], + } as any, + ], + sitemaps: ['/sitemap.xml'], + errors: ['Warning: something'], + }) + + expect(result.groups).toHaveLength(1) + expect(result.groups[0]?._indexable).toBe(true) + expect(result.sitemaps).toEqual(['/sitemap.xml']) + expect(result.errors).toEqual(['Warning: something']) + }) +}) From 08d63326943e5eedcea4166bc04286c3dbaef00b Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Thu, 9 Oct 2025 12:08:57 +1100 Subject: [PATCH 6/9] chore: maybe fix tests --- test/e2e/hook-config.test.ts | 47 +++++++++--------------- test/fixtures/hook-config/nuxt.config.ts | 3 ++ 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts index 13b5d179..0a6631e6 100644 --- a/test/e2e/hook-config.test.ts +++ b/test/e2e/hook-config.test.ts @@ -1,5 +1,5 @@ import { createResolver } from '@nuxt/kit' -import { setup } from '@nuxt/test-utils' +import { $fetch, fetch, setup } from '@nuxt/test-utils' import { describe, expect, it } from 'vitest' const { resolve } = createResolver(import.meta.url) @@ -8,20 +8,9 @@ process.env.NODE_ENV = 'production' describe('hook system (robots:robots-txt:input)', async () => { await setup({ - rootDir: resolve('../../.playground'), + rootDir: resolve('../fixtures/hook-config'), build: true, server: true, - nuxtConfig: { - nitro: { - plugins: [], - }, - hooks: { - 'nitro:config': function (nitroConfig: any) { - nitroConfig.plugins = nitroConfig.plugins || [] - nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts')) - }, - }, - }, }) it('robots:robots-txt:input hook is called and can add groups', async () => { @@ -34,15 +23,15 @@ describe('hook system (robots:robots-txt:input)', async () => { it('robots:robots-txt:input hook receives normalized groups', async () => { // Groups should be normalized with _indexable property // Pages that don't match disallow patterns should be indexable - const { headers } = await $fetch.raw('/') - expect(headers.get('x-robots-tag')).toContain('index') + const response = await fetch('/') + expect(response.headers.get('x-robots-tag')).toContain('index') }) it('should NOT block indexable pages when groups are added via hook', async () => { // This test demonstrates the bug: pages that should be indexable // are incorrectly marked as non-indexable because groups added via // the hook are missing the _indexable property - const { headers: indexHeaders } = await $fetch.raw('/', { + const indexResponse = await fetch('/', { headers: { 'User-Agent': 'Mozilla/5.0', }, @@ -51,58 +40,56 @@ describe('hook system (robots:robots-txt:input)', async () => { // This page should NOT have noindex header because: // 1. The disallow rule is for /_cwa/* which doesn't match / // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla - expect(indexHeaders.get('x-robots-tag')).toContain('index') - expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') + expect(indexResponse.headers.get('x-robots-tag')).toContain('index') + expect(indexResponse.headers.get('x-robots-tag')).not.toContain('noindex') }) it('should correctly block paths matching disallow patterns', async () => { // This should be blocked by the /_cwa/* rule even though page doesn't exist // We test with ignoreResponseError to capture headers from 404 responses - const { headers } = await $fetch.raw('/_cwa/test', { + const response = await fetch('/_cwa/test', { headers: { 'User-Agent': 'Mozilla/5.0', }, - ignoreResponseError: true, }) - expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + expect(response.headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) }) it('should block AhrefsBot from all paths', async () => { - const { headers: indexHeaders } = await $fetch.raw('/', { + const indexResponse = await fetch('/', { headers: { 'User-Agent': 'AhrefsBot', }, }) // AhrefsBot should be blocked everywhere - expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + expect(indexResponse.headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) }) // Edge case: Multiple hook calls shouldn't cause issues it('should handle multiple hook calls without breaking normalization', async () => { // Second request - the hook might be called again depending on caching - const { headers } = await $fetch.raw('/api/test', { + const response = await fetch('/api/test', { headers: { 'User-Agent': 'Mozilla/5.0', }, - ignoreResponseError: true, }) // Should still work correctly on subsequent requests - expect(headers.get('x-robots-tag')).toBeDefined() + expect(response.headers.get('x-robots-tag')).toBeDefined() }) // Edge case: Empty user agent header it('should handle requests with no user agent gracefully', async () => { - const { headers } = await $fetch.raw('/', { + const response = await fetch('/', { headers: { // No User-Agent header }, }) // Should still apply rules (defaults to * user agent) - expect(headers.get('x-robots-tag')).toBeDefined() + expect(response.headers.get('x-robots-tag')).toBeDefined() }) // Edge case: Case sensitivity in user agent matching @@ -114,14 +101,14 @@ describe('hook system (robots:robots-txt:input)', async () => { ] for (const { ua } of tests) { - const { headers } = await $fetch.raw('/', { + const response = await fetch('/', { headers: { 'User-Agent': ua, }, }) // User agent matching should be case-insensitive - expect(headers.get('x-robots-tag')).toContain('noindex') + expect(response.headers.get('x-robots-tag')).toContain('noindex') } }) }) diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts index 019abf56..9b19899c 100644 --- a/test/fixtures/hook-config/nuxt.config.ts +++ b/test/fixtures/hook-config/nuxt.config.ts @@ -6,4 +6,7 @@ export default defineNuxtConfig({ site: { url: 'https://example.com', }, + nitro: { + plugins: ['plugins/robots.ts'], + }, }) From 5c0705e8cde4200b8d32bc1f3c648bb4aecb0e55 Mon Sep 17 00:00:00 2001 From: silverbackdan Date: Thu, 9 Oct 2025 19:25:11 +0100 Subject: [PATCH 7/9] Attempt to fix test in CI by using tilde to define current fixture as app root. --- test/fixtures/hook-config/nuxt.config.ts | 2 +- test/fixtures/hook-config/server/tsconfig.json | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 test/fixtures/hook-config/server/tsconfig.json diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts index 9b19899c..b63bd832 100644 --- a/test/fixtures/hook-config/nuxt.config.ts +++ b/test/fixtures/hook-config/nuxt.config.ts @@ -7,6 +7,6 @@ export default defineNuxtConfig({ url: 'https://example.com', }, nitro: { - plugins: ['plugins/robots.ts'], + plugins: ['~/server/plugins/robots.ts'], }, }) diff --git a/test/fixtures/hook-config/server/tsconfig.json b/test/fixtures/hook-config/server/tsconfig.json new file mode 100644 index 00000000..076533d2 --- /dev/null +++ b/test/fixtures/hook-config/server/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "../../../../.playground/.nuxt/tsconfig.server.json" +} From 6dc55d027b648ecb3802a0b0a19b18b89156d0da Mon Sep 17 00:00:00 2001 From: silverbackdan Date: Thu, 9 Oct 2025 19:31:11 +0100 Subject: [PATCH 8/9] Attempt plugin resolution in test from resolver for CI test fix? --- test/fixtures/hook-config/nuxt.config.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts index b63bd832..5da8e1c6 100644 --- a/test/fixtures/hook-config/nuxt.config.ts +++ b/test/fixtures/hook-config/nuxt.config.ts @@ -1,12 +1,15 @@ -import NuxteRobots from '../../../src/module' +import { createResolver } from '@nuxt/kit' +import NuxtRobots from '../../../src/module' + +const { resolve } = createResolver(import.meta.url) export default defineNuxtConfig({ - modules: [NuxteRobots], + modules: [NuxtRobots], compatibilityDate: '2024-04-03', site: { url: 'https://example.com', }, nitro: { - plugins: ['~/server/plugins/robots.ts'], + plugins: [resolve('./server/plugins/robots.ts')], }, }) From d7b495e59958bca4f06328f3e22ad6895c0624a6 Mon Sep 17 00:00:00 2001 From: silverbackdan Date: Thu, 9 Oct 2025 19:53:38 +0100 Subject: [PATCH 9/9] Test if defining nuxtConfig in setup options makes a difference --- test/e2e/hook-config.test.ts | 5 +++++ test/fixtures/hook-config/nuxt.config.ts | 5 +---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts index 0a6631e6..19b8f541 100644 --- a/test/e2e/hook-config.test.ts +++ b/test/e2e/hook-config.test.ts @@ -11,6 +11,11 @@ describe('hook system (robots:robots-txt:input)', async () => { rootDir: resolve('../fixtures/hook-config'), build: true, server: true, + nuxtConfig: { + nitro: { + plugins: ['plugins/robots.ts'], + }, + }, }) it('robots:robots-txt:input hook is called and can add groups', async () => { diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts index 5da8e1c6..5bc22690 100644 --- a/test/fixtures/hook-config/nuxt.config.ts +++ b/test/fixtures/hook-config/nuxt.config.ts @@ -1,8 +1,5 @@ -import { createResolver } from '@nuxt/kit' import NuxtRobots from '../../../src/module' -const { resolve } = createResolver(import.meta.url) - export default defineNuxtConfig({ modules: [NuxtRobots], compatibilityDate: '2024-04-03', @@ -10,6 +7,6 @@ export default defineNuxtConfig({ url: 'https://example.com', }, nitro: { - plugins: [resolve('./server/plugins/robots.ts')], + plugins: ['plugins/robots.ts'], }, })