diff --git a/docs/content/3.nitro-api/2.nitro-hooks.md b/docs/content/3.nitro-api/2.nitro-hooks.md index cd2635af..27077b38 100644 --- a/docs/content/3.nitro-api/2.nitro-hooks.md +++ b/docs/content/3.nitro-api/2.nitro-hooks.md @@ -3,7 +3,7 @@ title: Nitro Hooks description: Learn how to use Nitro hooks to modify the robots final output. --- -## `'robots:config'`{lang="ts"} +## `'robots:init'`{lang="ts"} **Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} @@ -11,28 +11,74 @@ description: Learn how to use Nitro hooks to modify the robots final output. interface HookContext { groups: RobotsGroupResolved[] sitemaps: string[] - context: 'robots.txt' | 'init' - event?: H3Event // undefined on `init` + errors: string[] } ``` -Modify the robots config before it's used to generate the indexing rules. +Modify the robots config during Nitro initialization. This is called once when Nitro starts. -This is called when Nitro starts `init` as well as when generating the robots.txt `robots.txt`. +Use this hook when you need to fetch or compute robot rules at startup and cache them for all subsequent requests. -```ts [server/plugins/robots-ignore-routes.ts] +```ts [server/plugins/robots-init.ts] export default defineNitroPlugin((nitroApp) => { - nitroApp.hooks.hook('robots:config', async (ctx) => { - // extend the robot.txt rules at runtime - if (ctx.context === 'init') { - // probably want to cache this - const ignoredRoutes = await $fetch('/api/ignored-routes') - ctx.groups[0].disallow.push(...ignoredRoutes) + nitroApp.hooks.hook('robots:init', async (ctx) => { + // Fetch ignored routes at startup and cache them + const ignoredRoutes = await $fetch('/api/ignored-routes') + ctx.groups[0].disallow.push(...ignoredRoutes) + }) +}) +``` + +## `'robots:robots-txt:input'`{lang="ts"} + +**Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} + +```ts +interface HookContext { + groups: RobotsGroupResolved[] + sitemaps: string[] + errors: string[] + event: H3Event +} +``` + +Modify the robots config before generating the robots.txt file. This is called on each robots.txt request. + +Use this hook when you need to customize the robots.txt output based on the request context (e.g., headers). + +```ts [server/plugins/robots-dynamic.ts] +export default defineNitroPlugin((nitroApp) => { + nitroApp.hooks.hook('robots:robots-txt:input', async (ctx) => { + // Dynamically adjust rules based on request + const isDevelopment = ctx.event.headers.get('x-development') === 'true' + if (isDevelopment) { + ctx.groups[0].disallow.push('/staging/*') } }) }) ``` +## `'robots:config'`{lang="ts"} Deprecated + +**Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} + +::callout{type="warning"} +This hook is deprecated. Use `robots:init` for initialization or `robots:robots-txt:input` for robots.txt generation instead. +:: + +```ts +interface HookContext { + groups: RobotsGroupResolved[] + sitemaps: string[] + context: 'robots.txt' | 'init' + event?: H3Event // undefined on `init` +} +``` + +This hook was used for both initialization and robots.txt generation. It has been split into two separate hooks for better clarity: +- Use `robots:init` when `context === 'init'` +- Use `robots:robots-txt:input` when `context === 'robots.txt'` + ## `'robots:robots-txt'`{lang="ts"} **Type:** `(ctx: HookContext) => void | Promise`{lang="ts"} diff --git a/src/module.ts b/src/module.ts index c67c7a60..4c6515e1 100644 --- a/src/module.ts +++ b/src/module.ts @@ -541,7 +541,9 @@ export default defineNuxtModule({ } interface NitroRuntimeHooks { 'robots:config': (ctx: import('${typesPath}').HookRobotsConfigContext) => void | Promise + 'robots:init': (ctx: import('${typesPath}').HookRobotsInitContext) => void | Promise 'robots:robots-txt': (ctx: import('${typesPath}').HookRobotsTxtContext) => void | Promise + 'robots:robots-txt:input': (ctx: import('${typesPath}').HookRobotsTxtInputContext) => void | Promise }` return `// Generated by nuxt-robots diff --git a/src/runtime/server/plugins/initContext.ts b/src/runtime/server/plugins/initContext.ts index 9151af5c..0f6b733f 100644 --- a/src/runtime/server/plugins/initContext.ts +++ b/src/runtime/server/plugins/initContext.ts @@ -1,10 +1,11 @@ import type { NitroApp } from 'nitropack/types' +import type { HookRobotsConfigContext, HookRobotsInitContext } from '../../types' import { defineNitroPlugin, getRouteRules } from 'nitropack/runtime' import { withoutTrailingSlash } from 'ufo' -import { createPatternMap } from '../../../util' +import { createPatternMap, normalizeGroup } from '../../../util' import { useRuntimeConfigNuxtRobots } from '../composables/useRuntimeConfigNuxtRobots' import { logger } from '../logger' -import { resolveRobotsTxtContext } from '../util' +import { normalizeRobotsContext } from '../util' const PRERENDER_NO_SSR_ROUTES = new Set(['/index.html', '/200.html', '/404.html']) @@ -19,7 +20,35 @@ export default defineNitroPlugin(async (nitroApp: NitroApp) => { } nitroApp._robots = {} as typeof nitroApp._robots - await resolveRobotsTxtContext(undefined, nitroApp) + + // Get and normalize base context from runtime config + const { groups, sitemap: sitemaps } = useRuntimeConfigNuxtRobots() + const baseCtx = normalizeRobotsContext({ + groups: JSON.parse(JSON.stringify(groups)), + sitemaps: JSON.parse(JSON.stringify(sitemaps)), + }) + + // Call robots:init hook + const initCtx: HookRobotsInitContext = { + ...baseCtx, + } + await nitroApp.hooks.callHook('robots:init', initCtx) + + // Backwards compatibility: also call deprecated robots:config hook + const deprecatedCtx: HookRobotsConfigContext = { + ...initCtx, + event: undefined, + context: 'init', + } + await nitroApp.hooks.callHook('robots:config', deprecatedCtx) + + // Sync changes back and re-normalize + initCtx.groups = deprecatedCtx.groups.map(normalizeGroup) + initCtx.sitemaps = deprecatedCtx.sitemaps + initCtx.errors = deprecatedCtx.errors + + // Store in nitro app + nitroApp._robots.ctx = { ...initCtx, context: 'init', event: undefined } const nuxtContentUrls = new Set() if (isNuxtContentV2) { let urls: string[] | undefined diff --git a/src/runtime/server/routes/robots-txt.ts b/src/runtime/server/routes/robots-txt.ts index fc2fbc08..eb7874d7 100644 --- a/src/runtime/server/routes/robots-txt.ts +++ b/src/runtime/server/routes/robots-txt.ts @@ -1,12 +1,12 @@ -import type { HookRobotsConfigContext, HookRobotsTxtContext } from '../../types' +import type { HookRobotsConfigContext, HookRobotsTxtContext, HookRobotsTxtInputContext } from '../../types' import { logger } from '#robots/server/logger' import { withSiteUrl } from '#site-config/server/composables/utils' import { defineEventHandler, setHeader } from 'h3' import { useNitroApp } from 'nitropack/runtime' -import { asArray, generateRobotsTxt } from '../../util' +import { generateRobotsTxt, normalizeGroup } from '../../util' import { getSiteRobotConfig } from '../composables/getSiteRobotConfig' import { useRuntimeConfigNuxtRobots } from '../composables/useRuntimeConfigNuxtRobots' -import { resolveRobotsTxtContext } from '../util' +import { normalizeRobotsContext } from '../util' export default defineEventHandler(async (e) => { const nitroApp = useNitroApp() @@ -26,13 +26,43 @@ export default defineEventHandler(async (e) => { ], } if (indexable) { - robotsTxtCtx = await resolveRobotsTxtContext(e) - // normalise - robotsTxtCtx.sitemaps = [...new Set( - asArray(robotsTxtCtx.sitemaps) - // validate sitemaps are absolute - .map(s => !s.startsWith('http') ? withSiteUrl(e, s, { withBase: true, absolute: true }) : s), - )] + // Start from cached init context (includes robots:init modifications) + // or get fresh from runtime config and normalize + const { groups, sitemap: sitemaps } = useRuntimeConfigNuxtRobots(e) + const baseCtx = nitroApp._robots.ctx + ? { + groups: nitroApp._robots.ctx.groups, + sitemaps: JSON.parse(JSON.stringify(nitroApp._robots.ctx.sitemaps)), + errors: [], + } + : normalizeRobotsContext({ groups, sitemaps, errors: [] }) + + // Call robots:robots-txt:input hook + const inputCtx: HookRobotsTxtInputContext = { + ...baseCtx, + event: e, + } + await nitroApp.hooks.callHook('robots:robots-txt:input', inputCtx) + + // Backwards compatibility: also call deprecated robots:config hook + const deprecatedCtx: HookRobotsConfigContext = { + ...inputCtx, + context: 'robots.txt', + } + await nitroApp.hooks.callHook('robots:config', deprecatedCtx) + + // Sync changes back and re-normalize + inputCtx.groups = deprecatedCtx.groups.map(normalizeGroup) + inputCtx.sitemaps = deprecatedCtx.sitemaps + inputCtx.errors = deprecatedCtx.errors + + // Update nitro._robots.ctx so getPathRobotConfig can access the latest groups + nitroApp._robots.ctx = { ...inputCtx, context: 'robots.txt', event: e } + + robotsTxtCtx = inputCtx + // Make sitemaps absolute (already normalized and deduplicated in normalizeRobotsContext) + robotsTxtCtx.sitemaps = robotsTxtCtx.sitemaps + .map(s => !s.startsWith('http') ? withSiteUrl(e, s, { withBase: true, absolute: true }) : s) if (isNuxtContentV2) { const contentWithRobotRules = await e.$fetch('/__robots__/nuxt-content.json', { headers: { diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index 14cd927d..e6d8fc5f 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -1,20 +1,22 @@ -import type { H3Event } from 'h3' -import type { NitroApp } from 'nitropack' -import type { HookRobotsConfigContext } from '../types' -import { useNitroApp } from 'nitropack/runtime' -import { normalizeGroup } from '../../util' -import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots' +import type { ParsedRobotsTxt, RobotsGroupInput } from '../types' +import { asArray, normalizeGroup } from '../../util' -export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) { - const { groups, sitemap: sitemaps } = useRuntimeConfigNuxtRobots(e) - // make the config writable - const generateRobotsTxtCtx: HookRobotsConfigContext = { - event: e, - context: e ? 'robots.txt' : 'init', - ...JSON.parse(JSON.stringify({ groups, sitemaps })), +/** + * Pure normalization function for robots context + * - Groups are normalized with _indexable and _rules + * - Sitemaps are converted to array, deduplicated, and filtered for valid strings + * - Errors are converted to array and filtered for valid strings + * + * Note: URL absolutization (withSiteUrl) happens separately in robots-txt.ts since it requires H3Event + */ +export function normalizeRobotsContext(input: Partial): ParsedRobotsTxt { + return { + groups: asArray(input.groups).map(g => normalizeGroup(g as RobotsGroupInput)), + sitemaps: [...new Set( + asArray(input.sitemaps) + .filter(s => typeof s === 'string' && s.trim().length > 0), + )], + errors: asArray(input.errors) + .filter(e => typeof e === 'string' && e.trim().length > 0), } - await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) - generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) - nitro._robots.ctx = generateRobotsTxtCtx - return generateRobotsTxtCtx } diff --git a/src/runtime/types.ts b/src/runtime/types.ts index c56c7f7c..c9fe01d1 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -92,6 +92,21 @@ export interface HookRobotsConfigContext extends ParsedRobotsTxt { context: 'robots.txt' | 'init' } +/** + * Hook context for robots:init + * Called once during Nitro initialization + */ +export interface HookRobotsInitContext extends ParsedRobotsTxt { +} + +/** + * Hook context for robots:robots-txt:input + * Called on each robots.txt request + */ +export interface HookRobotsTxtInputContext extends ParsedRobotsTxt { + event: H3Event +} + // Bot Detection Types export interface BotDetectionContext { isBot: boolean diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts index 70d38b8c..19b8f541 100644 --- a/test/e2e/hook-config.test.ts +++ b/test/e2e/hook-config.test.ts @@ -1,40 +1,42 @@ import { createResolver } from '@nuxt/kit' -import { setup } from '@nuxt/test-utils' +import { $fetch, fetch, setup } from '@nuxt/test-utils' import { describe, expect, it } from 'vitest' const { resolve } = createResolver(import.meta.url) process.env.NODE_ENV = 'production' -describe('robots:config hook - issue #233', async () => { +describe('hook system (robots:robots-txt:input)', async () => { await setup({ - rootDir: resolve('../../.playground'), + rootDir: resolve('../fixtures/hook-config'), build: true, server: true, nuxtConfig: { nitro: { - plugins: [], - }, - hooks: { - 'nitro:config': function (nitroConfig: any) { - nitroConfig.plugins = nitroConfig.plugins || [] - nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts')) - }, + plugins: ['plugins/robots.ts'], }, }, }) - it('generates robots.txt with groups from hook', async () => { + it('robots:robots-txt:input hook is called and can add groups', async () => { const robotsTxt = await $fetch('/robots.txt') + // Should include groups added via robots:robots-txt:input hook expect(robotsTxt).toContain('Disallow: /_cwa/*') expect(robotsTxt).toContain('AhrefsBot') }) + it('robots:robots-txt:input hook receives normalized groups', async () => { + // Groups should be normalized with _indexable property + // Pages that don't match disallow patterns should be indexable + const response = await fetch('/') + expect(response.headers.get('x-robots-tag')).toContain('index') + }) + it('should NOT block indexable pages when groups are added via hook', async () => { // This test demonstrates the bug: pages that should be indexable // are incorrectly marked as non-indexable because groups added via // the hook are missing the _indexable property - const { headers: indexHeaders } = await $fetch.raw('/', { + const indexResponse = await fetch('/', { headers: { 'User-Agent': 'Mozilla/5.0', }, @@ -43,58 +45,56 @@ describe('robots:config hook - issue #233', async () => { // This page should NOT have noindex header because: // 1. The disallow rule is for /_cwa/* which doesn't match / // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla - expect(indexHeaders.get('x-robots-tag')).toContain('index') - expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') + expect(indexResponse.headers.get('x-robots-tag')).toContain('index') + expect(indexResponse.headers.get('x-robots-tag')).not.toContain('noindex') }) it('should correctly block paths matching disallow patterns', async () => { // This should be blocked by the /_cwa/* rule even though page doesn't exist // We test with ignoreResponseError to capture headers from 404 responses - const { headers } = await $fetch.raw('/_cwa/test', { + const response = await fetch('/_cwa/test', { headers: { 'User-Agent': 'Mozilla/5.0', }, - ignoreResponseError: true, }) - expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + expect(response.headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) }) it('should block AhrefsBot from all paths', async () => { - const { headers: indexHeaders } = await $fetch.raw('/', { + const indexResponse = await fetch('/', { headers: { 'User-Agent': 'AhrefsBot', }, }) // AhrefsBot should be blocked everywhere - expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + expect(indexResponse.headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) }) // Edge case: Multiple hook calls shouldn't cause issues it('should handle multiple hook calls without breaking normalization', async () => { // Second request - the hook might be called again depending on caching - const { headers } = await $fetch.raw('/api/test', { + const response = await fetch('/api/test', { headers: { 'User-Agent': 'Mozilla/5.0', }, - ignoreResponseError: true, }) // Should still work correctly on subsequent requests - expect(headers.get('x-robots-tag')).toBeDefined() + expect(response.headers.get('x-robots-tag')).toBeDefined() }) // Edge case: Empty user agent header it('should handle requests with no user agent gracefully', async () => { - const { headers } = await $fetch.raw('/', { + const response = await fetch('/', { headers: { // No User-Agent header }, }) // Should still apply rules (defaults to * user agent) - expect(headers.get('x-robots-tag')).toBeDefined() + expect(response.headers.get('x-robots-tag')).toBeDefined() }) // Edge case: Case sensitivity in user agent matching @@ -106,14 +106,14 @@ describe('robots:config hook - issue #233', async () => { ] for (const { ua } of tests) { - const { headers } = await $fetch.raw('/', { + const response = await fetch('/', { headers: { 'User-Agent': ua, }, }) // User agent matching should be case-insensitive - expect(headers.get('x-robots-tag')).toContain('noindex') + expect(response.headers.get('x-robots-tag')).toContain('noindex') } }) }) diff --git a/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts new file mode 100644 index 00000000..bf9feb57 --- /dev/null +++ b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts @@ -0,0 +1,56 @@ +import type { NitroApp } from 'nitropack' +import type { HookRobotsTxtInputContext } from '../../../../../src/runtime/types' +import { defineNitroPlugin } from 'nitropack/runtime' + +export default defineNitroPlugin((nitroApp: NitroApp) => { + nitroApp.hooks.hook('robots:robots-txt:input', async (ctx: HookRobotsTxtInputContext) => { + // Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash) + ctx.groups.push({ + userAgent: 'EdgeCaseBot1', + } as any) + + // Edge case 2: Add group that's already normalized (double normalization test) + ctx.groups.push({ + userAgent: ['EdgeCaseBot2'], + disallow: ['/'], + allow: [], + _indexable: false, + _rules: [{ pattern: '/', allow: false }], + } as any) + + // Edge case 3: Modify existing groups from config + // This tests if normalization preserves modifications + if (ctx.groups.length > 0 && ctx.groups[0]) { + ctx.groups[0].disallow?.push('/hook-added-path') + } + + // Edge case 4: Add group with "/" mixed with other patterns + ctx.groups.push({ + userAgent: ['EdgeCaseBot3'], + disallow: ['/admin', '/', '/api'], + } as any) + + // Edge case 5: Add group with non-array values (tests asArray conversion) + ctx.groups.push({ + userAgent: 'EdgeCaseBot4', + disallow: '/single-string-disallow', + allow: '/single-string-allow', + } as any) + + // Edge case 6: Add group with special characters and whitespace + ctx.groups.push({ + userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'], + disallow: [' /path-with-spaces ', '/normal'], + } as any) + + // Edge case 7: Completely remove groups (extreme case) + // Commented out because it would break robots.txt generation + // ctx.groups = [] + + // Edge case 8: Add duplicate user agents + ctx.groups.push({ + userAgent: ['*'], // Duplicate of default + disallow: ['/duplicate-test'], + } as any) + }) +}) diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts new file mode 100644 index 00000000..5bc22690 --- /dev/null +++ b/test/fixtures/hook-config/nuxt.config.ts @@ -0,0 +1,12 @@ +import NuxtRobots from '../../../src/module' + +export default defineNuxtConfig({ + modules: [NuxtRobots], + compatibilityDate: '2024-04-03', + site: { + url: 'https://example.com', + }, + nitro: { + plugins: ['plugins/robots.ts'], + }, +}) diff --git a/test/fixtures/hook-config/pages/about.vue b/test/fixtures/hook-config/pages/about.vue new file mode 100644 index 00000000..6f769cba --- /dev/null +++ b/test/fixtures/hook-config/pages/about.vue @@ -0,0 +1,3 @@ + diff --git a/test/fixtures/hook-config/pages/index.vue b/test/fixtures/hook-config/pages/index.vue new file mode 100644 index 00000000..77b1b733 --- /dev/null +++ b/test/fixtures/hook-config/pages/index.vue @@ -0,0 +1,3 @@ + diff --git a/test/fixtures/hook-config/server/plugins/robots.ts b/test/fixtures/hook-config/server/plugins/robots.ts new file mode 100644 index 00000000..a3abb6c5 --- /dev/null +++ b/test/fixtures/hook-config/server/plugins/robots.ts @@ -0,0 +1,23 @@ +import type { NitroApp } from 'nitropack' +import type { HookRobotsTxtInputContext } from '../../../../../src/runtime/types' +import { defineNitroPlugin } from 'nitropack/runtime' + +export default defineNitroPlugin((nitroApp: NitroApp) => { + // Test the new robots:robots-txt:input hook + nitroApp.hooks.hook('robots:robots-txt:input', async (ctx: HookRobotsTxtInputContext) => { + // Add groups via the hook + ctx.groups.push({ + userAgent: ['*'], + comment: ['Block all from operational endpoints'], + allow: [], + disallow: ['/_cwa/*'], + } as any) + + ctx.groups.push({ + userAgent: ['AhrefsBot'], + comment: ['Block AI crawlers'], + allow: [], + disallow: ['/'], + } as any) + }) +}) diff --git a/test/fixtures/hook-config/server/tsconfig.json b/test/fixtures/hook-config/server/tsconfig.json new file mode 100644 index 00000000..076533d2 --- /dev/null +++ b/test/fixtures/hook-config/server/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "../../../../.playground/.nuxt/tsconfig.server.json" +} diff --git a/test/fixtures/hook-config/tsconfig.json b/test/fixtures/hook-config/tsconfig.json new file mode 100644 index 00000000..be599924 --- /dev/null +++ b/test/fixtures/hook-config/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "../../../.playground/.nuxt/tsconfig.json" +} diff --git a/test/unit/normalizeRobotsContext.test.ts b/test/unit/normalizeRobotsContext.test.ts new file mode 100644 index 00000000..024f5943 --- /dev/null +++ b/test/unit/normalizeRobotsContext.test.ts @@ -0,0 +1,137 @@ +import { describe, expect, it } from 'vitest' +import { normalizeRobotsContext } from '../../src/runtime/server/util' + +describe('normalizeRobotsContext', () => { + it('should normalize empty input', () => { + const result = normalizeRobotsContext({}) + + expect(result.groups).toEqual([]) + expect(result.sitemaps).toEqual([]) + expect(result.errors).toEqual([]) + }) + + it('should normalize groups with _indexable and _rules', () => { + const result = normalizeRobotsContext({ + groups: [ + { + userAgent: ['*'], + disallow: ['/'], + } as any, + ], + }) + + expect(result.groups).toHaveLength(1) + expect(result.groups[0]).toHaveProperty('_indexable', false) + expect(result.groups[0]).toHaveProperty('_rules') + }) + + it('should convert string sitemap to array', () => { + const result = normalizeRobotsContext({ + sitemaps: '/sitemap.xml' as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml']) + }) + + it('should keep array sitemaps as array', () => { + const result = normalizeRobotsContext({ + sitemaps: ['/sitemap.xml', '/sitemap2.xml'], + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should filter out falsy sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: ['', '/sitemap.xml', null, undefined, '/sitemap2.xml'] as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should filter out whitespace-only sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: [' ', '/sitemap.xml', '\t', '/sitemap2.xml'] as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should deduplicate sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: ['/sitemap.xml', '/sitemap2.xml', '/sitemap.xml', '/sitemap2.xml'], + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should filter out non-string sitemaps', () => { + const result = normalizeRobotsContext({ + sitemaps: ['/sitemap.xml', 123, { url: '/sitemap' }, '/sitemap2.xml'] as any, + }) + + expect(result.sitemaps).toEqual(['/sitemap.xml', '/sitemap2.xml']) + }) + + it('should convert string errors to array', () => { + const result = normalizeRobotsContext({ + errors: 'Something went wrong' as any, + }) + + expect(result.errors).toEqual(['Something went wrong']) + }) + + it('should filter out falsy errors', () => { + const result = normalizeRobotsContext({ + errors: ['', 'Error 1', null, undefined, 'Error 2'] as any, + }) + + expect(result.errors).toEqual(['Error 1', 'Error 2']) + }) + + it('should filter out whitespace-only errors', () => { + const result = normalizeRobotsContext({ + errors: [' ', 'Error 1', '\n\t', 'Error 2'] as any, + }) + + expect(result.errors).toEqual(['Error 1', 'Error 2']) + }) + + it('should filter out non-string errors', () => { + const result = normalizeRobotsContext({ + errors: ['Error 1', 123, { message: 'error' }, 'Error 2'] as any, + }) + + expect(result.errors).toEqual(['Error 1', 'Error 2']) + }) + + it('should handle undefined values gracefully', () => { + const result = normalizeRobotsContext({ + groups: undefined, + sitemaps: undefined, + errors: undefined, + }) + + expect(result.groups).toEqual([]) + expect(result.sitemaps).toEqual([]) + expect(result.errors).toEqual([]) + }) + + it('should normalize complete input', () => { + const result = normalizeRobotsContext({ + groups: [ + { + userAgent: ['Googlebot'], + disallow: ['/admin'], + } as any, + ], + sitemaps: ['/sitemap.xml'], + errors: ['Warning: something'], + }) + + expect(result.groups).toHaveLength(1) + expect(result.groups[0]?._indexable).toBe(true) + expect(result.sitemaps).toEqual(['/sitemap.xml']) + expect(result.errors).toEqual(['Warning: something']) + }) +})