Skip to content

Commit eb7c47a

Browse files
committed
refactor: streamline domain handling by consolidating normalization and registrability checks
1 parent 5e3fa8c commit eb7c47a

File tree

6 files changed

+122
-91
lines changed

6 files changed

+122
-91
lines changed

app/[domain]/page.tsx

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import type { Metadata } from "next";
33
import { notFound, redirect } from "next/navigation";
44
import { DomainReportView } from "@/components/domain/domain-report-view";
55
import { analytics } from "@/lib/analytics/server";
6-
import { normalizeDomainInput } from "@/lib/domain";
76
import { toRegistrableDomain } from "@/lib/domain-server";
87
import { getQueryClient, trpc } from "@/trpc/server";
98

@@ -17,10 +16,9 @@ export async function generateMetadata({
1716
}): Promise<Metadata> {
1817
const { domain: raw } = await params;
1918
const decoded = decodeURIComponent(raw);
20-
const normalized = normalizeDomainInput(decoded);
2119

22-
const isRegistrable = toRegistrableDomain(normalized);
23-
if (!isRegistrable) {
20+
const registrable = toRegistrableDomain(decoded);
21+
if (!registrable) {
2422
// workaround, should match metadata from not-found.tsx
2523
return {
2624
title: "Not Found",
@@ -30,11 +28,11 @@ export async function generateMetadata({
3028

3129
return {
3230
title: {
33-
absolute: `${normalized} — Domain Report`,
31+
absolute: `${registrable} — Domain Report`,
3432
},
35-
description: `Domainstack report for ${normalized}: WHOIS lookup, DNS & SSL scan, HTTP headers, hosting & email provider data, and SEO metadata.`,
33+
description: `Domainstack report for ${registrable}: WHOIS lookup, DNS & SSL scan, HTTP headers, hosting & email provider data, and SEO metadata.`,
3634
alternates: {
37-
canonical: `/${normalized}`,
35+
canonical: `/${registrable}`,
3836
},
3937
};
4038
}
@@ -46,30 +44,29 @@ export default async function DomainPage({
4644
}) {
4745
const { domain: raw } = await params;
4846
const decoded = decodeURIComponent(raw);
49-
const normalized = normalizeDomainInput(decoded);
5047

51-
const isRegistrable = toRegistrableDomain(normalized);
52-
if (!isRegistrable) notFound();
48+
const registrable = toRegistrableDomain(decoded);
49+
if (!registrable) notFound();
5350

54-
// Canonicalize URL to the normalized domain (middleware should already handle most cases)
55-
if (normalized !== decoded) {
56-
redirect(`/${encodeURIComponent(normalized)}`);
51+
// Canonicalize URL to the registrable domain (middleware should already handle most cases)
52+
if (registrable !== decoded) {
53+
redirect(`/${encodeURIComponent(registrable)}`);
5754
}
5855

5956
// Track server-side page view
60-
analytics.track("report_viewed", { domain: normalized });
57+
analytics.track("report_viewed", { domain: registrable });
6158

6259
// Minimal prefetch: registration only, let sections stream progressively
6360
// Use getQueryClient() to ensure consistent query client across the request
6461
const queryClient = getQueryClient();
6562
void queryClient.prefetchQuery(
66-
trpc.domain.getRegistration.queryOptions({ domain: normalized }),
63+
trpc.domain.getRegistration.queryOptions({ domain: registrable }),
6764
);
6865

6966
return (
7067
<div className="container mx-auto max-w-4xl px-4 py-6">
7168
<HydrationBoundary state={dehydrate(queryClient)}>
72-
<DomainReportView domain={normalized} />
69+
<DomainReportView domain={registrable} />
7370
</HydrationBoundary>
7471
</div>
7572
);

lib/domain-server.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
11
import { toRegistrableDomain as toRegistrableDomainRdapper } from "rdapper";
22
import { cache } from "react";
33
import { BLACKLISTED_SUFFIXES } from "@/lib/constants/domain-validation";
4+
import { normalizeDomainInput } from "@/lib/domain";
45

5-
// A simple wrapper around rdapper's toRegistrableDomain that:
6-
// 1. is cached for per-request deduplication
7-
// 2. checks if the domain is blacklisted by BLACKLISTED_SUFFIXES in constants/domain-validation.ts
6+
// A wrapper around rdapper's toRegistrableDomain that:
7+
// 1. normalizes user input (strips schemes, paths, ports, auth, www., etc.)
8+
// 2. is cached for per-request deduplication
9+
// 3. checks if the domain is blacklisted by BLACKLISTED_SUFFIXES in constants/domain-validation.ts
810
export const toRegistrableDomain = cache(function toRegistrableDomain(
911
input: string,
1012
): string | null {
11-
const value = (input ?? "").trim().toLowerCase();
13+
// First normalize the input to extract a clean hostname
14+
// This handles user input with schemes, paths, ports, auth, trailing dots, www., etc.
15+
const normalized = normalizeDomainInput(input);
16+
if (!normalized) return null;
17+
18+
const value = normalized.trim().toLowerCase();
1219
if (value === "") return null;
1320

1421
// Shortcut: exact suffixes such as ".css.map" that frequently appear

lib/domain.test.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,54 @@ describe("normalizeDomainInput", () => {
2424
"ex-ample.com",
2525
);
2626
});
27+
28+
it("handles malformed protocols (single slash)", () => {
29+
expect(normalizeDomainInput("http:/example.com")).toBe("example.com");
30+
});
31+
32+
it("handles malformed protocols (triple slash)", () => {
33+
expect(normalizeDomainInput("http:///example.com")).toBe("example.com");
34+
});
35+
36+
it("handles malformed protocols (multiple colons)", () => {
37+
expect(normalizeDomainInput("https:::example.com/path")).toBe(
38+
"example.com",
39+
);
40+
});
41+
42+
it("rejects IPv6 literals", () => {
43+
expect(normalizeDomainInput("[::1]")).toBe("");
44+
expect(normalizeDomainInput("[::1]:8080")).toBe("");
45+
expect(normalizeDomainInput("http://[2001:db8::1]/path")).toBe("");
46+
});
47+
48+
it("handles spaces and whitespace", () => {
49+
expect(normalizeDomainInput(" example.com ")).toBe("example.com");
50+
expect(normalizeDomainInput("example.com /path")).toBe("example.com");
51+
});
52+
53+
it("strips www from subdomains", () => {
54+
expect(normalizeDomainInput("www.example.com")).toBe("example.com");
55+
expect(normalizeDomainInput("WWW.EXAMPLE.COM")).toBe("example.com");
56+
});
57+
58+
it("preserves non-www subdomains", () => {
59+
expect(normalizeDomainInput("api.example.com")).toBe("api.example.com");
60+
expect(normalizeDomainInput("sub.domain.example.com")).toBe(
61+
"sub.domain.example.com",
62+
);
63+
});
64+
65+
it("handles query parameters and fragments", () => {
66+
expect(normalizeDomainInput("example.com?query=value")).toBe("example.com");
67+
expect(normalizeDomainInput("example.com#fragment")).toBe("example.com");
68+
expect(normalizeDomainInput("example.com?q=1#frag")).toBe("example.com");
69+
});
70+
71+
it("returns empty string for empty input", () => {
72+
expect(normalizeDomainInput("")).toBe("");
73+
expect(normalizeDomainInput(" ")).toBe("");
74+
});
2775
});
2876

2977
describe("isValidDomain", () => {

lib/domain.ts

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,87 @@
11
// Utilities for handling user-provided domain input
22

3+
// Matches beginning "http:" or "https:" followed by any number of slashes/colons
4+
// Captures the authority (host + userinfo + port)
5+
// This handles malformed protocols like "http:/example.com" or "http:///example.com"
6+
const SCHEME_PREFIX_REGEX = /^https?[:/]+([^/]+)/i;
7+
38
/**
4-
* Normalize arbitrary user input into a bare registrable domain string.
9+
* Normalize arbitrary user input into a bare hostname string.
510
* Accepts values like:
611
* - "example.com"
712
* - "www.example.com."
813
* - "https://example.com/path?x#y"
914
* - "http://user:pass@example.com:8080/"
15+
* - "http:/example.com" (malformed protocol)
1016
* - " EXAMPLE.COM "
11-
* Returns a lowercased hostname without scheme, path, auth, port, or trailing dot.
17+
* Returns a lowercased hostname without scheme, path, auth, port, trailing dot, or www. prefix.
18+
* Returns empty string for invalid/unparseable input or IPv6 literals.
1219
*/
1320
export function normalizeDomainInput(input: string): string {
1421
let value = (input ?? "").trim();
1522
if (value === "") return "";
1623

17-
// If it looks like a URL (has a scheme), use URL parsing
18-
const hasScheme = /:\/\//.test(value);
19-
if (hasScheme) {
24+
// Reject IPv6 literals early (e.g., "[::1]", "[::1]:8080")
25+
// These are not supported and would cause issues in URL parsing
26+
if (value.includes("[") || value.includes("]")) {
27+
return "";
28+
}
29+
30+
// Try to extract authority (host) from scheme-prefixed input
31+
// This handles both valid and malformed protocols
32+
const schemeMatch = value.match(SCHEME_PREFIX_REGEX);
33+
if (schemeMatch) {
34+
// Extract authority from the scheme match
35+
value = schemeMatch[1];
36+
} else if (/:\/\//.test(value)) {
37+
// Has scheme-like pattern but didn't match our regex (e.g., "fake+scheme://...")
38+
// Try URL parsing first
2039
try {
2140
const url = new URL(value);
22-
// URL applies IDNA (punycode) and strips auth/port/path for hostname
2341
value = url.hostname;
2442
} catch {
25-
// If invalid URL with scheme, strip leading scheme-like prefix manually
43+
// Fallback: strip scheme-like prefix manually
2644
value = value.replace(/^\w+:\/\//, "");
27-
// Remove credentials if present
28-
value = value.replace(/^[^@]+@/, "");
29-
// Remove path/query/fragment
30-
value = value.split("/")[0].split("?")[0].split("#")[0];
3145
}
3246
} else {
33-
// No scheme: try URL parsing with implicit http:// to get punycoded hostname
47+
// No scheme detected: try URL parsing with implicit http:// to get punycoded hostname
3448
try {
3549
const url = new URL(`http://${value}`);
3650
value = url.hostname;
3751
} catch {
38-
// Fallback: remove any credentials, path, query, or fragment accidentally included
39-
value = value.replace(/^[^@]+@/, "");
40-
value = value.split("/")[0].split("?")[0].split("#")[0];
52+
// Fallback: treat as raw authority and parse manually
4153
}
4254
}
4355

44-
// Strip port if present
45-
value = value.replace(/:\d+$/, "");
56+
// Strip query and fragment (in case they weren't already removed)
57+
value = value.split(/[?#]/)[0];
58+
59+
// Strip User Info (credentials)
60+
const atIndex = value.lastIndexOf("@");
61+
if (atIndex !== -1) {
62+
value = value.slice(atIndex + 1);
63+
}
64+
65+
// Strip port
66+
value = value.split(":")[0];
67+
68+
// Remove any path components that might remain
69+
value = value.split("/")[0];
4670

4771
// Strip trailing dot
4872
value = value.replace(/\.$/, "");
4973

74+
// Trim any remaining whitespace
75+
value = value.trim();
76+
5077
// Remove common leading www.
5178
value = value.replace(/^www\./i, "");
5279

5380
return value.toLowerCase();
5481
}
5582

5683
/**
57-
* Basic domain validity check (hostname-like), not performing DNS or RDAP.
84+
* An even more basic domain validity check (hostname-like), not performing DNS or RDAP.
5885
*/
5986
export function isValidDomain(value: string): boolean {
6087
const v = (value ?? "").trim();

lib/middleware.ts

Lines changed: 4 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@ import type { NextRequest } from "next/server";
22
import { NextResponse } from "next/server";
33
import { toRegistrableDomain } from "@/lib/domain-server";
44

5-
// Matches beginning "http:" or "https:" followed by any number of slashes/colons
6-
// Captures the authority (host + userinfo + port)
7-
export const SCHEME_PREFIX_REGEX = /^https?[:/]+([^/]+)/i;
8-
95
export type ProxyAction =
106
| { type: "match" }
117
| { type: "redirect"; destination: string }
@@ -40,57 +36,15 @@ export function getProxyAction(path: string): ProxyAction {
4036
// ignore decoding failures
4137
}
4238

43-
let candidate = decodedInput;
44-
45-
// 3. Extract authority (host) candidate
46-
// If scheme present, extract authority from it.
47-
// Otherwise, treat the whole string as potential authority start.
48-
const schemeMatch = candidate.match(SCHEME_PREFIX_REGEX);
49-
let authority = schemeMatch ? schemeMatch[1] : candidate;
50-
51-
// 4. Cleanup: Strip query, fragment, path (if not already stripped by regex)
52-
// Note: Regex above stops at first slash, so path is already gone if scheme matched.
53-
// If scheme didn't match, we manually strip path.
54-
if (!schemeMatch) {
55-
authority = authority.split("/")[0];
56-
}
57-
58-
// Strip query and fragment (order doesn't matter as we take the first occurrence of either)
59-
authority = authority.split(/[?#]/)[0];
60-
61-
authority = authority.trim();
62-
63-
// 5. Strip User Info
64-
const atIndex = authority.lastIndexOf("@");
65-
if (atIndex !== -1) {
66-
authority = authority.slice(atIndex + 1);
67-
}
68-
69-
// 6. Strip Port
70-
// IPv6 literals in brackets (e.g. [::1]) are not supported.
71-
if (authority.includes("[") || authority.includes("]")) {
72-
return null;
73-
}
74-
75-
// Safe to split on colon as valid domains don't contain colons
76-
authority = authority.split(":")[0];
77-
78-
candidate = authority.trim();
79-
80-
if (!candidate) {
81-
return null;
82-
}
83-
84-
// 7. Validate and Normalize
85-
// This will return null for invalid domains, including IPs if rdapper handles them as such.
86-
const registrable = toRegistrableDomain(candidate);
39+
// 3. Validate and extract the registrable domain
40+
const registrable = toRegistrableDomain(decodedInput);
8741
if (!registrable) {
8842
return null;
8943
}
9044

91-
// 8. Redirect if necessary
45+
// 4. Redirect if necessary
9246
// We compare the originally decoded input against the final canonical domain.
93-
// Any difference (path, query, scheme, case, whitespace, userinfo, port) triggers a redirect.
47+
// Any difference (path, query, scheme, case, whitespace, userinfo, port, subdomain) triggers a redirect.
9448
if (decodedInput !== registrable) {
9549
return {
9650
type: "redirect",

server/routers/domain.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { TRPCError } from "@trpc/server";
22
import z from "zod";
3-
import { normalizeDomainInput } from "@/lib/domain";
43
import { toRegistrableDomain } from "@/lib/domain-server";
54
import {
65
BlobUrlResponseSchema,
@@ -30,8 +29,7 @@ import {
3029
const DomainInputSchema = z
3130
.object({ domain: z.string().min(1) })
3231
.transform(({ domain }) => {
33-
const normalized = normalizeDomainInput(domain);
34-
const registrable = toRegistrableDomain(normalized);
32+
const registrable = toRegistrableDomain(domain);
3533
if (!registrable) {
3634
throw new TRPCError({
3735
code: "BAD_REQUEST",

0 commit comments

Comments
 (0)