From 08061f215fbd28ef1a883849e93dc62a8c8aba2d Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 30 Oct 2025 13:48:40 +1000 Subject: [PATCH 01/20] blahhhhhhhhh --- src/content.config.ts | 13 + src/content/docs/guides/.gitignore | 1 + src/content/docs/guides/_cli.md | 45 +++ src/content/docs/guides/cli.md | 617 ----------------------------- src/fetchReadme.ts | 92 +++++ 5 files changed, 151 insertions(+), 617 deletions(-) create mode 100644 src/content/docs/guides/.gitignore create mode 100644 src/content/docs/guides/_cli.md delete mode 100644 src/content/docs/guides/cli.md create mode 100644 src/fetchReadme.ts diff --git a/src/content.config.ts b/src/content.config.ts index a3a3642..3e3fd8f 100644 --- a/src/content.config.ts +++ b/src/content.config.ts @@ -1,6 +1,19 @@ import { defineCollection } from "astro:content"; import { docsLoader } from "@astrojs/starlight/loaders"; import { docsSchema } from "@astrojs/starlight/schema"; +import { generateCliOptionsMarkdown } from "./fetchReadme"; +import { writeFileSync, readFileSync, rmSync } from "node:fs"; + +const docTemplateFile = "src/content/docs/guides/_cli.md"; +const docOutputFile = docTemplateFile.replace('_', ''); + +rmSync(docOutputFile, { force: true }); +const usageText = await generateCliOptionsMarkdown(); + +const docTemplateText = readFileSync(docTemplateFile, "utf-8"); +const docOutput = docTemplateText.replace('README-OPTIONS-PLACEHOLDER', usageText); +writeFileSync(docOutputFile, docOutput); + export const collections = { docs: defineCollection({ loader: docsLoader(), schema: docsSchema() }), diff --git a/src/content/docs/guides/.gitignore b/src/content/docs/guides/.gitignore new file mode 100644 index 0000000..e0d911b --- /dev/null +++ b/src/content/docs/guides/.gitignore @@ -0,0 +1 @@ +/cli.md diff --git a/src/content/docs/guides/_cli.md b/src/content/docs/guides/_cli.md new file mode 100644 index 0000000..9430230 --- /dev/null +++ b/src/content/docs/guides/_cli.md @@ -0,0 +1,45 @@ +--- +title: CLI +--- + + +README-OPTIONS-PLACEHOLDER + +## Repeating Options + +Options can be specified multiple times. This is true for: + +- `--exclude` +- `--exclude-path` +- `--header` +- `--include` +- `--remap` +- `--scheme` + +Here is an example: + +```bash +lychee --exclude https://example.com --exclude https://example.org README.md +``` + +There is a shorthand where you can specify multiple arguments in one go. + +Instead of writing this: + +```bash +lychee --scheme http --scheme file https://example.com +``` + +You can also write this: + +```bash +lychee --scheme http file -- https://example.com +``` + +:::caution[Attention] +If you use the shorthand notation you need to separate the options from the inputs with `--`. +Otherwise, the options will be interpreted as inputs! +::: diff --git a/src/content/docs/guides/cli.md b/src/content/docs/guides/cli.md deleted file mode 100644 index 7dd4eeb..0000000 --- a/src/content/docs/guides/cli.md +++ /dev/null @@ -1,617 +0,0 @@ ---- -title: CLI ---- - -## Usage - -```bash -lychee [OPTIONS] ... -``` - -### Arguments - -**`...`** - -The inputs (where to get links to check from). These can be: - -- Files (e.g. `README.md`) -- File globs (e.g. `"~/git/*/README.md"`) -- Remote URLs (e.g. `https://example.com/README.md`) -- Standard input (`-`) - -:::note -Use `--` to separate inputs from options that allow multiple arguments. -::: - -## General Options - -### `--config` / `-c` - -Configuration file to use. - -**Default:** `lychee.toml` - -```bash -lychee --config custom-config.toml -``` - -### `--verbose` / `-v` - -Set verbosity level; more output per occurrence (e.g. `-v` or `-vv`). - -```bash -lychee -vv README.md -``` - -### `--quiet` / `-q` - -Less output per occurrence (e.g. `-q` or `-qq`). - -```bash -lychee -qq README.md -``` - -### `--no-progress` / `-n` - -Do not show progress bar. This is recommended for non-interactive shells (e.g. for continuous integration). - -```bash -lychee --no-progress README.md -``` - -### `--help` / `-h` - -Print help information (use `-h` for a summary). - -### `--version` / `-V` - -Print version information. - -## Input Options - -### `--extensions` - -Test the specified file extensions for URIs when checking files locally. - -Multiple extensions can be separated by commas. Note that if you want to check filetypes which have multiple extensions, e.g. HTML files with both `.html` and `.htm` extensions, you need to specify both extensions explicitly. - -**Default:** `md,mkd,mdx,mdown,mdwn,mkdn,mkdown,markdown,html,htm,txt` - -```bash -lychee --extensions md,html,txt -``` - -### `--skip-missing` - -Skip missing input files (default is to error if they don't exist). - -```bash -lychee --skip-missing file1.md file2.md -``` - -### `--no-ignore` - -Do not skip files that would otherwise be ignored by `.gitignore`, `.ignore`, or the global ignore file. - -```bash -lychee --no-ignore . -``` - -### `--hidden` - -Do not skip hidden directories and files. - -```bash -lychee --hidden . -``` - -### `--glob-ignore-case` - -Ignore case when expanding filesystem path glob inputs. - -```bash -lychee --glob-ignore-case "**/*.MD" -``` - -### `--dump` - -Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked. - -```bash -lychee --dump README.md -``` - -### `--dump-inputs` - -Don't perform any link extraction and checking. Instead, dump all input sources from which links would be collected. - -```bash -lychee --dump-inputs "docs/**/*.md" -``` - -## Caching Options - -### `--cache` - -Use request cache stored on disk at `.lycheecache`. - -```bash -lychee --cache README.md -``` - -### `--max-cache-age` - -Discard all cached requests older than this duration. - -**Default:** `1d` - -```bash -lychee --cache --max-cache-age 7d README.md -``` - -### `--cache-exclude-status` - -A list of status codes that will be ignored from the cache. - -The following exclude range syntax is supported: `[start]..[[=]end]|code` - -Valid examples: - -- `429` (excludes the 429 status code only) -- `500..` (excludes any status code >= 500) -- `..100` (excludes any status code < 100) -- `500..=599` (excludes any status code from 500 to 599 inclusive) -- `500..600` (excludes any status code from 500 to 600 excluding 600, same as 500..=599) - -```bash -lychee --cache --cache-exclude-status '429, 500..502' README.md -``` - -## Network Options - -### `--max-redirects` / `-m` - -Maximum number of allowed redirects. - -**Default:** `5` - -```bash -lychee --max-redirects 10 README.md -``` - -### `--max-retries` - -Maximum number of retries per request. - -**Default:** `3` - -```bash -lychee --max-retries 5 README.md -``` - -### `--retry-wait-time` / `-r` - -Minimum wait time in seconds between retries of failed requests. - -**Default:** `1` - -```bash -lychee --retry-wait-time 5 README.md -``` - -### `--timeout` / `-t` - -Website timeout in seconds from connect to response finished. - -**Default:** `20` - -```bash -lychee --timeout 30 README.md -``` - -### `--max-concurrency` - -Maximum number of concurrent network requests. - -**Default:** `128` - -```bash -lychee --max-concurrency 64 README.md -``` - -### `--threads` / `-T` - -Number of threads to utilize. Defaults to number of cores available to the system. - -```bash -lychee --threads 4 README.md -``` - -### `--user-agent` / `-u` - -User agent string to use for requests. - -**Default:** `lychee/0.20.1` - -```bash -lychee --user-agent "Mozilla/5.0" README.md -``` - -### `--insecure` / `-i` - -Proceed for server connections considered insecure (invalid TLS). - -```bash -lychee --insecure README.md -``` - -### `--min-tls` - -Minimum accepted TLS Version. - -**Possible values:** `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, `TLSv1_3` - -```bash -lychee --min-tls TLSv1_2 README.md -``` - -### `--method` / `-X` - -Request method to use. - -**Default:** `get` - -```bash -lychee --method head README.md -``` - -### `--offline` - -Only check local files and block network requests. - -```bash -lychee --offline README.md -``` - -## Authentication Options - -### `--header` / `-H` - -Set custom header for requests. - -Some websites require custom headers to be passed in order to return valid responses. You can specify custom headers in the format `'Name: Value'`. For example, `'Accept: text/html'`. This is the same format that other tools like curl or wget use. Multiple headers can be specified by using the flag multiple times. - -```bash -lychee --header "Accept: text/html" --header "Authorization: Bearer token" README.md -``` - -### `--basic-auth` - -Basic authentication support. - -Format: `http://example.com username:password` - -```bash -lychee --basic-auth "http://example.com user:pass" README.md -``` - -### `--github-token` - -GitHub API token to use when checking github.com links, to avoid rate limiting. - -**Environment variable:** `GITHUB_TOKEN` - -```bash -lychee --github-token ghp_xxxxxxxxxxxx README.md -# or -export GITHUB_TOKEN=ghp_xxxxxxxxxxxx -lychee README.md -``` - -### `--cookie-jar` - -Tell lychee to read cookies from the given file. Cookies will be stored in the cookie jar and sent with requests. New cookies will be stored in the cookie jar and existing cookies will be updated. - -```bash -lychee --cookie-jar cookies.txt README.md -``` - -## Filter Options - -### `--scheme` / `-s` - -Only test links with the given schemes (e.g. https). Omit to check links with any other scheme. - -Supported schemes: `http`, `https`, `file`, `mailto` - -```bash -lychee --scheme https README.md -lychee --scheme http https file -- README.md -``` - -:::note -If you don't specify any schemes, lychee will check all links regardless of their scheme. Otherwise, it will only check links with the specified schemes. -::: - -### `--include` - -URLs to check (supports regex). Has preference over all excludes. - -```bash -lychee --include "https://example.com.*" README.md -``` - -### `--exclude` - -Exclude URLs and mail addresses from checking. The values are treated as regular expressions. - -```bash -lychee --exclude "https://example.com" --exclude "mailto:.*" README.md -``` - -### `--exclude-path` - -Exclude paths from getting checked. The values are treated as regular expressions. - -```bash -lychee --exclude-path "node_modules" --exclude-path "vendor" . -``` - -### `--exclude-file` - -:::caution[Deprecated] -Use `--exclude-path` instead. -::: - -### `--exclude-all-private` / `-E` - -Exclude all private IPs from checking. Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`. - -```bash -lychee --exclude-all-private README.md -``` - -### `--exclude-private` - -Exclude private IP address ranges from checking. - -```bash -lychee --exclude-private README.md -``` - -### `--exclude-link-local` - -Exclude link-local IP address range from checking. - -```bash -lychee --exclude-link-local README.md -``` - -### `--exclude-loopback` - -Exclude loopback IP address range and localhost from checking. - -```bash -lychee --exclude-loopback README.md -``` - -### `--include-mail` - -Also check email addresses. - -```bash -lychee --include-mail README.md -``` - -### `--include-fragments` - -Enable the checking of fragments in links (e.g., checking if `#section` exists on a page). - -```bash -lychee --include-fragments README.md -``` - -### `--include-verbatim` - -Find links in verbatim sections like `pre`- and `code` blocks. - -```bash -lychee --include-verbatim README.md -``` - -### `--include-wikilinks` - -Check WikiLinks in Markdown files. - -```bash -lychee --include-wikilinks README.md -``` - -## Status Code Options - -### `--accept` / `-a` - -A list of accepted status codes for valid links. - -The following accept range syntax is supported: `[start]..[[=]end]|code` - -Valid examples: - -- `200` (accepts the 200 status code only) -- `..204` (accepts any status code < 204) -- `..=204` (accepts any status code <= 204) -- `200..=204` (accepts any status code from 200 to 204 inclusive) -- `200..205` (accepts any status code from 200 to 205 excluding 205, same as 200..=204) - -**Default:** `100..=103,200..=299` - -```bash -lychee --accept '200..=204, 429, 500' README.md -``` - -### `--require-https` - -When HTTPS is available, treat HTTP links as errors. - -```bash -lychee --require-https README.md -``` - -## URL Transformation Options - -### `--base-url` / `-b` - -Base URL used to resolve relative URLs during link checking. - -```bash -lychee --base-url https://example.com docs/ -``` - -### `--base` - -:::caution[Deprecated] -Use `--base-url` instead. -::: - -### `--root-dir` - -Root path to use when checking absolute local links. Must be an absolute path. - -```bash -lychee --root-dir /home/user/project docs/ -``` - -### `--remap` - -Remap URI matching pattern to different URI. - -```bash -lychee --remap "https://old.example.com https://new.example.com" README.md -``` - -### `--fallback-extensions` - -When checking locally, attempts to locate missing files by trying the given fallback extensions. Multiple extensions can be separated by commas. Extensions will be checked in order of appearance. - -:::note -This option only takes effect on `file://` URIs which do not exist. -::: - -```bash -lychee --fallback-extensions html,htm,php,asp README.md -``` - -### `--index-files` - -When checking locally, resolves directory links to a separate index file. The argument is a comma-separated list of index file names to search for. Index names are relative to the link's directory and attempted in the order given. - -If `--index-files` is specified, then at least one index file must exist in order for a directory link to be considered valid. Additionally, the special name `.` can be used in the list to refer to the directory itself. - -If unspecified (the default behavior), index files are disabled and directory links are considered valid as long as the directory exists. - -:::note -This option only takes effect on `file://` URIs which exist and point to a directory. -::: - -**Examples:** - -```bash -# Looks for index.html or readme.md and requires that at least one exists -lychee --index-files index.html,readme.md docs/ - -# Will use index.html if it exists, but still accept the directory link regardless -lychee --index-files index.html,. docs/ - -# Will reject all directory links because there are no valid index files -lychee --index-files '' docs/ -``` - -## Web Archive Options - -### `--archive` - -Specify the use of a specific web archive. Can be used in combination with `--suggest`. - -**Possible values:** `wayback` - -```bash -lychee --archive wayback --suggest README.md -``` - -### `--suggest` - -Suggest link replacements for broken links, using a web archive. The web archive can be specified with `--archive`. - -```bash -lychee --suggest README.md -``` - -## Output Options - -### `--output` / `-o` - -Output file of status report. - -```bash -lychee --output report.txt README.md -``` - -### `--format` / `-f` - -Output format of final status report. - -**Default:** `compact` - -**Possible values:** `compact`, `detailed`, `json`, `markdown`, `raw` - -```bash -lychee --format json --output report.json README.md -``` - -### `--mode` - -Set the output display mode. Determines how results are presented in the terminal. - -**Default:** `color` - -**Possible values:** `plain`, `color`, `emoji`, `task` - -```bash -lychee --mode emoji README.md -``` - -## Repeating Options - -Options can be specified multiple times. This is true for: - -- `--exclude` -- `--exclude-path` -- `--header` -- `--include` -- `--remap` -- `--scheme` - -Here is an example: - -```bash -lychee --exclude https://example.com --exclude https://example.org README.md -``` - -There is a shorthand where you can specify multiple arguments in one go. - -Instead of writing this: - -```bash -lychee --scheme http --scheme file https://example.com -``` - -You can also write this: - -```bash -lychee --scheme http file -- https://example.com -``` - -:::caution[Attention] -If you use the shorthand notation you need to separate the options from the inputs with `--`. -Otherwise, the options will be interpreted as inputs! -::: diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts new file mode 100644 index 0000000..5be1ff4 --- /dev/null +++ b/src/fetchReadme.ts @@ -0,0 +1,92 @@ +import assert from 'node:assert'; + +const VERSION = "lychee-v0.21.0"; + +// https://raw.githubusercontent.com/lycheeverse/lychee/master/README.md +const url = `https://raw.githubusercontent.com/lycheeverse/lychee/refs/tags/${VERSION}/README.md`; + + +function extractHelpFromReadme(readme: string) { + const [, section] = readme.split(/### Commandline Parameters/, 2); + if (!section) + throw new Error( + "LycheeCliOptions: commandline parameters section not found in readme", + ); + + const [, text] = section.split("\n```help-message\n", 2); + if (!text) + throw new Error( + "LycheeCliOptions: ```help-message marker not found in commandline parameters section", + ); + + const [helpText] = text.split("\n```\n", 2); + if (!helpText) + throw new Error( + "LycheeCliOptions: closing ``` marker not found after ```text", + ); + + return helpText; +} + +// https://stackoverflow.com/a/6234804 +function escapeMarkdown(unsafe: string): string { + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +function splitLines(s: string): string[] { + return s.split(/\r?\n/g); +} + +function* generateMarkdown(lines: string[]) { + const headingRegex = /^\w+:$/; + const optionRegex = /^[- ,a-zA-Z]{2,6}(--|\[)([a-z-.\]]+)/; + const usageRegex = /^Usage: /; + const bodyRegex = /^ (.*)/; + + let match; + for (const line of lines) { + if (line.match(usageRegex)) { + yield '```'; + yield line; + yield '```'; + + } else if (line.match(headingRegex)) { + yield "## " + escapeMarkdown(line.replace(/:$/, '')); + + } else if (match = line.match(optionRegex)) { + // TODO: zero width space......... + const option = escapeMarkdown(match[0]).replace(/-/g, '-​'); + yield `### ${option.trim()}`; + yield ''; + yield '```'; + yield line.trimStart(); + yield '```'; + + } else if (match = line.match(bodyRegex)) { + yield ' ' + match[1]; + + } else { + yield line; + } + } +} + +export async function generateCliOptionsMarkdown() { + const readme = await fetch(url); + assert(readme.ok, `${readme.status} when fetching ${url}`); + + const rawUsageText = extractHelpFromReadme(await readme.text()); + const usageText = [...generateMarkdown(splitLines(rawUsageText))].join("\n"); + + assert(usageText.search('\n## Options\n'), 'options heading missing'); + assert(usageText.search('\n### --dump\n'), '--dump heading missing'); + assert(usageText.search('\n### --root-dir\n'), '--rot-dir heading missing'); + + return usageText; +} + From 7311e8cdf25c283203b7ac6135ca6ade609c1158 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 2 Nov 2025 13:25:47 +1000 Subject: [PATCH 02/20] use custom integration --- astro.config.mjs | 8 ++++++++ src/content.config.ts | 13 ------------- src/fetchReadme.ts | 45 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/astro.config.mjs b/astro.config.mjs index 7c40dda..70b7c01 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -1,10 +1,18 @@ import starlight from "@astrojs/starlight"; import { defineConfig } from "astro/config"; +import { generateCliOptionsIntegration } from "./src/fetchReadme"; +import smartypants from "remark-smartypants"; // https://astro.build/config export default defineConfig({ site: "https://lychee.cli.rs", + markdown: { + remarkPlugins: [ + [smartypants, { dashes: false }] + ] + }, integrations: [ + generateCliOptionsIntegration("src/content/docs/guides/_cli.md"), starlight({ expressiveCode: { themes: ["catppuccin-frappe", "catppuccin-latte"], diff --git a/src/content.config.ts b/src/content.config.ts index 3e3fd8f..a3a3642 100644 --- a/src/content.config.ts +++ b/src/content.config.ts @@ -1,19 +1,6 @@ import { defineCollection } from "astro:content"; import { docsLoader } from "@astrojs/starlight/loaders"; import { docsSchema } from "@astrojs/starlight/schema"; -import { generateCliOptionsMarkdown } from "./fetchReadme"; -import { writeFileSync, readFileSync, rmSync } from "node:fs"; - -const docTemplateFile = "src/content/docs/guides/_cli.md"; -const docOutputFile = docTemplateFile.replace('_', ''); - -rmSync(docOutputFile, { force: true }); -const usageText = await generateCliOptionsMarkdown(); - -const docTemplateText = readFileSync(docTemplateFile, "utf-8"); -const docOutput = docTemplateText.replace('README-OPTIONS-PLACEHOLDER', usageText); -writeFileSync(docOutputFile, docOutput); - export const collections = { docs: defineCollection({ loader: docsLoader(), schema: docsSchema() }), diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 5be1ff4..50cff42 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -1,10 +1,14 @@ +import type { AstroIntegration } from 'astro'; import assert from 'node:assert'; +import { readFileSync, realpathSync, rmSync, writeFileSync } from 'node:fs'; +import { basename, dirname, join } from 'node:path'; const VERSION = "lychee-v0.21.0"; // https://raw.githubusercontent.com/lycheeverse/lychee/master/README.md const url = `https://raw.githubusercontent.com/lycheeverse/lychee/refs/tags/${VERSION}/README.md`; +const TEMPLATE = 'README-OPTIONS-PLACEHOLDER'; function extractHelpFromReadme(readme: string) { const [, section] = readme.split(/### Commandline Parameters/, 2); @@ -59,9 +63,8 @@ function* generateMarkdown(lines: string[]) { yield "## " + escapeMarkdown(line.replace(/:$/, '')); } else if (match = line.match(optionRegex)) { - // TODO: zero width space......... - const option = escapeMarkdown(match[0]).replace(/-/g, '-​'); - yield `### ${option.trim()}`; + const option = escapeMarkdown(match[0]).trim(); + yield `### ${option}`; yield ''; yield '```'; yield line.trimStart(); @@ -83,10 +86,40 @@ export async function generateCliOptionsMarkdown() { const rawUsageText = extractHelpFromReadme(await readme.text()); const usageText = [...generateMarkdown(splitLines(rawUsageText))].join("\n"); - assert(usageText.search('\n## Options\n'), 'options heading missing'); - assert(usageText.search('\n### --dump\n'), '--dump heading missing'); - assert(usageText.search('\n### --root-dir\n'), '--rot-dir heading missing'); + assert(usageText.match('\n## Options\n'), 'options heading missing, check headingRegex'); + assert(usageText.match('\n### --dump\n'), '--dump heading missing, check optionRegex'); + assert(usageText.match('\n### --root-dir\n'), '--root-dir heading missing, check optionRegex'); + assert(usageText.match('\n Inputs for link checking'), 'expected body text missing, check bodyRegex'); return usageText; } + +export function generateCliOptionsIntegration(templatePath: string): AstroIntegration { + const [dir, file] = [dirname(templatePath), basename(templatePath)]; + + const outputPath = join(dir, file.replace('_', '')); + + return { + name: 'lycheeverse:generate-cli-page', + hooks: { + 'astro:config:setup': async ({ logger, addWatchFile }) => { + logger.info("Using template file " + templatePath); + + addWatchFile(realpathSync(templatePath)); + addWatchFile(import.meta.filename); + + logger.info("Fetching from git tag " + VERSION); + rmSync(outputPath, { force: true }); + const usageText = generateCliOptionsMarkdown(); + + const docTemplateText = readFileSync(templatePath, "utf-8"); + const docOutput = docTemplateText.replace(TEMPLATE, await usageText); + + assert(docOutput != docTemplateText, `Placeholder ${TEMPLATE} not found in template file`); + logger.info("Writing output file " + outputPath); + writeFileSync(outputPath, docOutput); + } + } + }; +} From d3d085071696d72aca228c21cec53ad2d1ec2317 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 2 Nov 2025 13:30:10 +1000 Subject: [PATCH 03/20] add message --- astro.config.mjs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/astro.config.mjs b/astro.config.mjs index 70b7c01..ecb8ebf 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -8,6 +8,8 @@ export default defineConfig({ site: "https://lychee.cli.rs", markdown: { remarkPlugins: [ + // automatically converting smart dashes causes problems with cli arguments. + // to insert dashes, use unicode or — or –. [smartypants, { dashes: false }] ] }, From b968bb4b83603c968bf8b228644d121df165d973 Mon Sep 17 00:00:00 2001 From: rina Date: Wed, 5 Nov 2025 19:09:42 +1000 Subject: [PATCH 04/20] fdjsamiofdsajiofdsaijo --- src/content/docs/guides/cli.md | 640 +++++++++++++++++---------------- src/fetchReadme.ts | 14 +- 2 files changed, 349 insertions(+), 305 deletions(-) diff --git a/src/content/docs/guides/cli.md b/src/content/docs/guides/cli.md index 1d552c4..e2def10 100644 --- a/src/content/docs/guides/cli.md +++ b/src/content/docs/guides/cli.md @@ -1,584 +1,620 @@ --- title: CLI --- + -## Usage +lychee is a fast, asynchronous link checker which detects broken URLs and mail addresses in local files and websites. It supports Markdown and HTML and works well with many plain text file formats. + +lychee is powered by lychee-lib, the Rust library for link checking. + +``` +Usage: lychee [OPTIONS] [inputs]... +``` + +## Arguments +### [inputs]... ```bash -lychee [OPTIONS] ... +lychee [inputs]... ``` + Inputs for link checking (where to get links to check from). These can be: + files (e.g. `README.md`), file globs (e.g. `'~/git/*/README.md'`), remote URLs + (e.g. `https://example.com/README.md`), or standard input (`-`). Alternatively, + use `--files-from` to read inputs from a file. -### Arguments + NOTE: Use `--` to separate inputs from options that allow multiple arguments. -**`...`** +## Options +### -a, --accept -The inputs (where to get links to check from). These can be: +```bash +lychee --accept +``` + A List of accepted status codes for valid links -- Files (e.g. `README.md`) -- File globs (e.g. `"~/git/*/README.md"`) -- Remote URLs (e.g. `https://example.com/README.md`) -- Standard input (`-`) + The following accept range syntax is supported: [start]..[[=]end]|code. Some valid + examples are: -:::note -Use `--` to separate inputs from options that allow multiple arguments. -::: + - 200 (accepts the 200 status code only) + - ..204 (accepts any status code < 204) + - ..=204 (accepts any status code <= 204) + - 200..=204 (accepts any status code from 200 to 204 inclusive) + - 200..205 (accepts any status code from 200 to 205 excluding 205, same as 200..=204) -## General Options + Use "lychee --accept '200..=204, 429, 500' ..." to provide a comma- + separated list of accepted status codes. This example will accept 200, 201, + 202, 203, 204, 429, and 500 as valid status codes. -### `--config` / `-c` +**default**: 100..=103,200..=299 -Configuration file to use. -**Default:** `lychee.toml` +### --archive ```bash -lychee --config custom-config.toml +lychee --archive ``` + Specify the use of a specific web archive. Can be used in combination with `--suggest` + +**possible values**: wayback -### `--verbose` / `-v` -Set verbosity level; more output per occurrence (e.g. `-v` or `-vv`). +### -b, --base-url ```bash -lychee -vv README.md +lychee --base-url ``` + Base URL to use when resolving relative URLs in local files. If specified, + relative links in local files are interpreted as being relative to the given + base URL. -### `--quiet` / `-q` + For example, given a base URL of `https://example.com/dir/page`, the link `a` + would resolve to `https://example.com/dir/a` and the link `/b` would resolve + to `https://example.com/b`. This behavior is not affected by the filesystem + path of the file containing these links. -Less output per occurrence (e.g. `-q` or `-qq`). + Note that relative URLs without a leading slash become siblings of the base + URL. If, instead, the base URL ended in a slash, the link would become a child + of the base URL. For example, a base URL of `https://example.com/dir/page/` and + a link of `a` would resolve to `https://example.com/dir/page/a`. -```bash -lychee -qq README.md -``` + Basically, the base URL option resolves links as if the local files were hosted + at the given base URL address. -### `--no-progress` / `-n` + The provided base URL value must either be a URL (with scheme) or an absolute path. + Note that certain URL schemes cannot be used as a base, e.g., `data` and `mailto`. -Do not show progress bar. This is recommended for non-interactive shells (e.g. for continuous integration). +### --base ```bash -lychee --no-progress README.md +lychee --base ``` + Deprecated; use `--base-url` instead -### `--help` / `-h` - -Print help information (use `-h` for a summary). +### --basic-auth -### `--version` / `-V` - -Print version information. +```bash +lychee --basic-auth +``` + Basic authentication support. E.g. `http://example.com username:password` -## Input Options +### -c, --config -### `--extensions` +```bash +lychee --config +``` + Configuration file to use -Test the specified file extensions for URIs when checking files locally. +**default**: lychee.toml -Multiple extensions can be separated by commas. Note that if you want to check filetypes which have multiple extensions, e.g. HTML files with both `.html` and `.htm` extensions, you need to specify both extensions explicitly. -**Default:** `md,mkd,mdx,mdown,mdwn,mkdn,mkdown,markdown,html,htm,txt` +### --cache ```bash -lychee --extensions md,html,txt +lychee --cache ``` + Use request cache stored on disk at `.lycheecache` -### `--skip-missing` - -Skip missing input files (default is to error if they don't exist). +### --cache-exclude-status ```bash -lychee --skip-missing file1.md file2.md +lychee --cache-exclude-status ``` + A list of status codes that will be ignored from the cache -### `--no-ignore` + The following exclude range syntax is supported: [start]..[[=]end]|code. Some valid + examples are: -Do not skip files that would otherwise be ignored by `.gitignore`, `.ignore`, or the global ignore file. + - 429 (excludes the 429 status code only) + - 500.. (excludes any status code >= 500) + - ..100 (excludes any status code < 100) + - 500..=599 (excludes any status code from 500 to 599 inclusive) + - 500..600 (excludes any status code from 500 to 600 excluding 600, same as 500..=599) + + Use "lychee --cache-exclude-status '429, 500..502' ..." to provide a + comma-separated list of excluded status codes. This example will not cache results + with a status code of 429, 500 and 501. + +### --cookie-jar ```bash -lychee --no-ignore . +lychee --cookie-jar ``` + Tell lychee to read cookies from the given file. Cookies will be stored in the + cookie jar and sent with requests. New cookies will be stored in the cookie jar + and existing cookies will be updated. -### `--hidden` - -Do not skip hidden directories and files. +### --default-extension ```bash -lychee --hidden . +lychee --default-extension ``` + This is the default file extension that is applied to files without an extension. -### `--glob-ignore-case` + This is useful for files without extensions or with unknown extensions. The extension will be used to determine the file type for processing. Examples: --default-extension md, --default-extension html -Ignore case when expanding filesystem path glob inputs. +### --dump ```bash -lychee --glob-ignore-case "**/*.MD" +lychee --dump ``` + Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked -### `--dump` - -Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked. +### --dump-inputs ```bash -lychee --dump README.md +lychee --dump-inputs ``` + Don't perform any link extraction and checking. Instead, dump all input sources from which links would be collected -### `--dump-inputs` - -Don't perform any link extraction and checking. Instead, dump all input sources from which links would be collected. +### -E, --exclude-all-private ```bash -lychee --dump-inputs "docs/**/*.md" +lychee --exclude-all-private ``` + Exclude all private IPs from checking. + Equivalent to `--exclude-private --exclude-link-local --exclude-loopback` -## Caching Options +### --exclude -### `--cache` +```bash +lychee --exclude +``` + Exclude URLs and mail addresses from checking. The values are treated as regular expressions -Use request cache stored on disk at `.lycheecache`. +### --exclude-file ```bash -lychee --cache README.md +lychee --exclude-file ``` + Deprecated; use `--exclude-path` instead -### `--max-cache-age` +### --exclude-link-local -Discard all cached requests older than this duration. +```bash +lychee --exclude-link-local +``` + Exclude link-local IP address range from checking -**Default:** `1d` +### --exclude-loopback ```bash -lychee --cache --max-cache-age 7d README.md +lychee --exclude-loopback ``` + Exclude loopback IP address range and localhost from checking -### `--cache-exclude-status` +### --exclude-path -A list of status codes that will be ignored from the cache. +```bash +lychee --exclude-path +``` + Exclude paths from getting checked. The values are treated as regular expressions -The following exclude range syntax is supported: `[start]..[[=]end]|code` +### --exclude-private -Valid examples: +```bash +lychee --exclude-private +``` + Exclude private IP address ranges from checking -- `429` (excludes the 429 status code only) -- `500..` (excludes any status code >= 500) -- `..100` (excludes any status code < 100) -- `500..=599` (excludes any status code from 500 to 599 inclusive) -- `500..600` (excludes any status code from 500 to 600 excluding 600, same as 500..=599) +### --extensions ```bash -lychee --cache --cache-exclude-status '429, 500..502' README.md +lychee --extensions ``` + Test the specified file extensions for URIs when checking files locally. -## Network Options + Multiple extensions can be separated by commas. Note that if you want to check filetypes, + which have multiple extensions, e.g. HTML files with both .html and .htm extensions, you need to + specify both extensions explicitly. -### `--max-redirects` / `-m` +**default**: md,mkd,mdx,mdown,mdwn,mkdn,mkdown,markdown,html,htm,txt -Maximum number of allowed redirects. -**Default:** `5` +### -f, --format ```bash -lychee --max-redirects 10 README.md +lychee --format ``` + Output format of final status report + +**default**: compact -### `--max-retries` +**possible values**: compact, detailed, json, markdown, raw -Maximum number of retries per request. -**Default:** `3` +### --fallback-extensions ```bash -lychee --max-retries 5 README.md +lychee --fallback-extensions ``` + When checking locally, attempts to locate missing files by trying the given + fallback extensions. Multiple extensions can be separated by commas. Extensions + will be checked in order of appearance. -### `--retry-wait-time` / `-r` + Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi -Minimum wait time in seconds between retries of failed requests. + Note: This option takes effect on `file://` URIs which do not exist and on + `file://` URIs pointing to directories which resolve to themself (by the + --index-files logic). -**Default:** `1` +### --files-from ```bash -lychee --retry-wait-time 5 README.md +lychee --files-from ``` + Read input filenames from the given file or stdin (if path is '-'). -### `--timeout` / `-t` + This is useful when you have a large number of inputs that would be + cumbersome to specify on the command line directly. -Website timeout in seconds from connect to response finished. + Examples: + lychee --files-from list.txt + find . -name '*.md' | lychee --files-from - + echo 'README.md' | lychee --files-from - -**Default:** `20` + File Format: + Each line should contain one input (file path, URL, or glob pattern). + Lines starting with '#' are treated as comments and ignored. + Empty lines are also ignored. + +### --generate ```bash -lychee --timeout 30 README.md +lychee --generate ``` + Generate special output (e.g. the man page) instead of performing link checking -### `--max-concurrency` +**possible values**: man -Maximum number of concurrent network requests. -**Default:** `128` +### --github-token ```bash -lychee --max-concurrency 64 README.md +lychee --github-token ``` + GitHub API token to use when checking github.com links, to avoid rate limiting + +**env**: GITHUB_TOKEN -### `--threads` / `-T` -Number of threads to utilize. Defaults to number of cores available to the system. +### --glob-ignore-case ```bash -lychee --threads 4 README.md +lychee --glob-ignore-case ``` + Ignore case when expanding filesystem path glob inputs -### `--user-agent` / `-u` - -User agent string to use for requests. - -**Default:** `lychee/0.20.1` +### -h, --help ```bash -lychee --user-agent "Mozilla/5.0" README.md +lychee --help ``` + Print help (see a summary with '-h') -### `--insecure` / `-i` - -Proceed for server connections considered insecure (invalid TLS). +### -H, --header ```bash -lychee --insecure README.md +lychee --header ``` + Set custom header for requests -### `--min-tls` + Some websites require custom headers to be passed in order to return valid responses. + You can specify custom headers in the format 'Name: Value'. For example, 'Accept: text/html'. + This is the same format that other tools like curl or wget use. + Multiple headers can be specified by using the flag multiple times. -Minimum accepted TLS Version. - -**Possible values:** `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, `TLSv1_3` +### --hidden ```bash -lychee --min-tls TLSv1_2 README.md +lychee --hidden ``` + Do not skip hidden directories and files -### `--method` / `-X` - -Request method to use. - -**Default:** `get` +### -i, --insecure ```bash -lychee --method head README.md +lychee --insecure ``` + Proceed for server connections considered insecure (invalid TLS) -### `--offline` - -Only check local files and block network requests. +### --include ```bash -lychee --offline README.md +lychee --include ``` + URLs to check (supports regex). Has preference over all excludes -## Authentication Options - -### `--header` / `-H` - -Set custom header for requests. - -Some websites require custom headers to be passed in order to return valid responses. You can specify custom headers in the format `'Name: Value'`. For example, `'Accept: text/html'`. This is the same format that other tools like curl or wget use. Multiple headers can be specified by using the flag multiple times. +### --include-fragments ```bash -lychee --header "Accept: text/html" --header "Authorization: Bearer token" README.md +lychee --include-fragments ``` + Enable the checking of fragments in links -### `--basic-auth` +### --include-mail -Basic authentication support. +```bash +lychee --include-mail +``` + Also check email addresses -Format: `http://example.com username:password` +### --include-verbatim ```bash -lychee --basic-auth "http://example.com user:pass" README.md +lychee --include-verbatim ``` + Find links in verbatim sections like `pre`- and `code` blocks -### `--github-token` +### --include-wikilinks -GitHub API token to use when checking github.com links, to avoid rate limiting. +```bash +lychee --include-wikilinks +``` + Check WikiLinks in Markdown files -**Environment variable:** `GITHUB_TOKEN` +### --index-files ```bash -lychee --github-token ghp_xxxxxxxxxxxx README.md -# or -export GITHUB_TOKEN=ghp_xxxxxxxxxxxx -lychee README.md +lychee --index-files ``` + When checking locally, resolves directory links to a separate index file. + The argument is a comma-separated list of index file names to search for. Index + names are relative to the link's directory and attempted in the order given. -### `--cookie-jar` + If `--index-files` is specified, then at least one index file must exist in + order for a directory link to be considered valid. Additionally, the special + name `.` can be used in the list to refer to the directory itself. -Tell lychee to read cookies from the given file. Cookies will be stored in the cookie jar and sent with requests. New cookies will be stored in the cookie jar and existing cookies will be updated. + If unspecified (the default behavior), index files are disabled and directory + links are considered valid as long as the directory exists on disk. -```bash -lychee --cookie-jar cookies.txt README.md -``` + Example 1: `--index-files index.html,readme.md` looks for index.html or readme.md + and requires that at least one exists. -## Filter Options + Example 2: `--index-files index.html,.` will use index.html if it exists, but + still accept the directory link regardless. -### `--scheme` / `-s` + Example 3: `--index-files ''` will reject all directory links because there are + no valid index files. This will require every link to explicitly name + a file. -Only test links with the given schemes (e.g. https). Omit to check links with any other scheme. + Note: This option only takes effect on `file://` URIs which exist and point to a directory. -Supported schemes: `http`, `https`, `file`, `mailto` +### -m, --max-redirects ```bash -lychee --scheme https README.md -lychee --scheme http https file -- README.md +lychee --max-redirects ``` + Maximum number of allowed redirects -:::note -If you don't specify any schemes, lychee will check all links regardless of their scheme. Otherwise, it will only check links with the specified schemes. -::: +**default**: 5 -### `--include` -URLs to check (supports regex). Has preference over all excludes. +### --max-cache-age ```bash -lychee --include "https://example.com.*" README.md +lychee --max-cache-age ``` + Discard all cached requests older than this duration + +**default**: 1d -### `--exclude` -Exclude URLs and mail addresses from checking. The values are treated as regular expressions. +### --max-concurrency ```bash -lychee --exclude "https://example.com" --exclude "mailto:.*" README.md +lychee --max-concurrency ``` + Maximum number of concurrent network requests -### `--exclude-path` +**default**: 128 -Exclude paths from getting checked. The values are treated as regular expressions. + +### --max-retries ```bash -lychee --exclude-path "node_modules" --exclude-path "vendor" . +lychee --max-retries ``` + Maximum number of retries per request -### `--exclude-file` - -:::caution[Deprecated] -Use `--exclude-path` instead. -::: +**default**: 3 -### `--exclude-all-private` / `-E` -Exclude all private IPs from checking. Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`. +### --min-tls ```bash -lychee --exclude-all-private README.md +lychee --min-tls ``` + Minimum accepted TLS Version + +**possible values**: TLSv1_0, TLSv1_1, TLSv1_2, TLSv1_3 -### `--exclude-private` -Exclude private IP address ranges from checking. +### --mode ```bash -lychee --exclude-private README.md +lychee --mode ``` + Set the output display mode. Determines how results are presented in the terminal -### `--exclude-link-local` +**default**: color -Exclude link-local IP address range from checking. - -```bash -lychee --exclude-link-local README.md -``` +**possible values**: plain, color, emoji, task -### `--exclude-loopback` -Exclude loopback IP address range and localhost from checking. +### -n, --no-progress ```bash -lychee --exclude-loopback README.md +lychee --no-progress ``` + Do not show progress bar. + This is recommended for non-interactive shells (e.g. for continuous integration) -### `--include-mail` - -Also check email addresses. +### --no-ignore ```bash -lychee --include-mail README.md +lychee --no-ignore ``` + Do not skip files that would otherwise be ignored by '.gitignore', '.ignore', or the global ignore file -### `--include-fragments` - -Enable the checking of fragments in links (e.g., checking if `#section` exists on a page). +### -o, --output ```bash -lychee --include-fragments README.md +lychee --output ``` + Output file of status report -### `--include-verbatim` - -Find links in verbatim sections like `pre`- and `code` blocks. +### --offline ```bash -lychee --include-verbatim README.md +lychee --offline ``` + Only check local files and block network requests -### `--include-wikilinks` - -Check WikiLinks in Markdown files. +### -q, --quiet... ```bash -lychee --include-wikilinks README.md +lychee --quiet... ``` + Less output per occurrence (e.g. `-q` or `-qq`) -## Status Code Options - -### `--accept` / `-a` +### -r, --retry-wait-time -A list of accepted status codes for valid links. - -The following accept range syntax is supported: `[start]..[[=]end]|code` +```bash +lychee --retry-wait-time +``` + Minimum wait time in seconds between retries of failed requests -Valid examples: +**default**: 1 -- `200` (accepts the 200 status code only) -- `..204` (accepts any status code < 204) -- `..=204` (accepts any status code <= 204) -- `200..=204` (accepts any status code from 200 to 204 inclusive) -- `200..205` (accepts any status code from 200 to 205 excluding 205, same as 200..=204) -**Default:** `100..=103,200..=299` +### --remap ```bash -lychee --accept '200..=204, 429, 500' README.md +lychee --remap ``` + Remap URI matching pattern to different URI -### `--require-https` - -When HTTPS is available, treat HTTP links as errors. +### --require-https ```bash -lychee --require-https README.md +lychee --require-https ``` + When HTTPS is available, treat HTTP links as errors -## URL Transformation Options - -### `--base-url` / `-b` - -Base URL used to resolve relative URLs during link checking. +### --root-dir ```bash -lychee --base-url https://example.com docs/ +lychee --root-dir ``` + Root directory to use when checking absolute links in local files. This option is + required if absolute links appear in local files, otherwise those links will be + flagged as errors. This must be an absolute path (i.e., one beginning with `/`). -### `--base` - -:::caution[Deprecated] -Use `--base-url` instead. -::: + If specified, absolute links in local files are resolved by prefixing the given + root directory to the requested absolute link. For example, with a root-dir of + `/root/dir`, a link to `/page.html` would be resolved to `/root/dir/page.html`. -### `--root-dir` + This option can be specified alongside `--base-url`. If both are given, an + absolute link is resolved by constructing a URL from three parts: the domain + name specified in `--base-url`, followed by the `--root-dir` directory path, + followed by the absolute link's own path. -Root path to use when checking absolute local links. Must be an absolute path. +### -s, --scheme ```bash -lychee --root-dir /home/user/project docs/ +lychee --scheme ``` + Only test links with the given schemes (e.g. https). Omit to check links with + any other scheme. At the moment, we support http, https, file, and mailto. -### `--remap` - -Remap URI matching pattern to different URI. +### --skip-missing ```bash -lychee --remap "https://old.example.com https://new.example.com" README.md +lychee --skip-missing ``` + Skip missing input files (default is to error if they don't exist) -### `--fallback-extensions` - -When checking locally, attempts to locate missing files by trying the given fallback extensions. Multiple extensions can be separated by commas. Extensions will be checked in order of appearance. - -:::note -This option only takes effect on `file://` URIs which do not exist. -::: +### --suggest ```bash -lychee --fallback-extensions html,htm,php,asp README.md +lychee --suggest ``` + Suggest link replacements for broken links, using a web archive. The web archive can be specified with `--archive` -### `--index-files` - -When checking locally, resolves directory links to a separate index file. The argument is a comma-separated list of index file names to search for. Index names are relative to the link's directory and attempted in the order given. - -If `--index-files` is specified, then at least one index file must exist in order for a directory link to be considered valid. Additionally, the special name `.` can be used in the list to refer to the directory itself. - -If unspecified (the default behavior), index files are disabled and directory links are considered valid as long as the directory exists. - -:::note -This option only takes effect on `file://` URIs which exist and point to a directory. -::: - -**Examples:** +### -t, --timeout ```bash -# Looks for index.html or readme.md and requires that at least one exists -lychee --index-files index.html,readme.md docs/ - -# Will use index.html if it exists, but still accept the directory link regardless -lychee --index-files index.html,. docs/ - -# Will reject all directory links because there are no valid index files -lychee --index-files '' docs/ +lychee --timeout ``` + Website timeout in seconds from connect to response finished -## Web Archive Options - -### `--archive` +**default**: 20 -Specify the use of a specific web archive. Can be used in combination with `--suggest`. -**Possible values:** `wayback` +### -T, --threads ```bash -lychee --archive wayback --suggest README.md +lychee --threads ``` + Number of threads to utilize. Defaults to number of cores available to the system -### `--suggest` - -Suggest link replacements for broken links, using a web archive. The web archive can be specified with `--archive`. +### -u, --user-agent ```bash -lychee --suggest README.md +lychee --user-agent ``` + User agent -## Output Options +**default**: lychee/0.20.1 -### `--output` / `-o` -Output file of status report. +### -v, --verbose... ```bash -lychee --output report.txt README.md +lychee --verbose... ``` + Set verbosity level; more output per occurrence (e.g. `-v` or `-vv`) -### `--format` / `-f` - -Output format of final status report. - -**Default:** `compact` - -**Possible values:** `compact`, `detailed`, `json`, `markdown`, `raw` +### -V, --version ```bash -lychee --format json --output report.json README.md +lychee --version ``` + Print version -### `--mode` - -Set the output display mode. Determines how results are presented in the terminal. - -**Default:** `color` - -**Possible values:** `plain`, `color`, `emoji`, `task` +### -X, --method ```bash -lychee --mode emoji README.md +lychee --method ``` + Request method + +**default**: get + ## Repeating Options diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 50cff42..8e81fd6 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -51,6 +51,7 @@ function* generateMarkdown(lines: string[]) { const optionRegex = /^[- ,a-zA-Z]{2,6}(--|\[)([a-z-.\]]+)/; const usageRegex = /^Usage: /; const bodyRegex = /^ (.*)/; + const defaultValuesRegex = /^\[(default|possible values|env): (.*)\]$/; let match; for (const line of lines) { @@ -64,14 +65,21 @@ function* generateMarkdown(lines: string[]) { } else if (match = line.match(optionRegex)) { const option = escapeMarkdown(match[0]).trim(); + const longOption = line.replace(/-[^-],/, ''); yield `### ${option}`; yield ''; - yield '```'; - yield line.trimStart(); + yield '```bash'; + yield `lychee ${longOption.trimStart()}`; yield '```'; } else if (match = line.match(bodyRegex)) { - yield ' ' + match[1]; + const line = match[1]; + if (match = line.match(defaultValuesRegex)) { + yield `**${match[1]}**: ${match[2]}`; + yield ''; + } else { + yield ' ' + line; + } } else { yield line; From 45a3710fbe96a8d492c72fe89fc9094ef7339e55 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 8 Nov 2025 12:25:53 +1000 Subject: [PATCH 05/20] rm cli.md --- src/content/docs/guides/cli.md | 638 --------------------------------- 1 file changed, 638 deletions(-) delete mode 100644 src/content/docs/guides/cli.md diff --git a/src/content/docs/guides/cli.md b/src/content/docs/guides/cli.md deleted file mode 100644 index e2def10..0000000 --- a/src/content/docs/guides/cli.md +++ /dev/null @@ -1,638 +0,0 @@ ---- -title: CLI ---- - - -lychee is a fast, asynchronous link checker which detects broken URLs and mail addresses in local files and websites. It supports Markdown and HTML and works well with many plain text file formats. - -lychee is powered by lychee-lib, the Rust library for link checking. - -``` -Usage: lychee [OPTIONS] [inputs]... -``` - -## Arguments -### [inputs]... - -```bash -lychee [inputs]... -``` - Inputs for link checking (where to get links to check from). These can be: - files (e.g. `README.md`), file globs (e.g. `'~/git/*/README.md'`), remote URLs - (e.g. `https://example.com/README.md`), or standard input (`-`). Alternatively, - use `--files-from` to read inputs from a file. - - NOTE: Use `--` to separate inputs from options that allow multiple arguments. - -## Options -### -a, --accept - -```bash -lychee --accept -``` - A List of accepted status codes for valid links - - The following accept range syntax is supported: [start]..[[=]end]|code. Some valid - examples are: - - - 200 (accepts the 200 status code only) - - ..204 (accepts any status code < 204) - - ..=204 (accepts any status code <= 204) - - 200..=204 (accepts any status code from 200 to 204 inclusive) - - 200..205 (accepts any status code from 200 to 205 excluding 205, same as 200..=204) - - Use "lychee --accept '200..=204, 429, 500' ..." to provide a comma- - separated list of accepted status codes. This example will accept 200, 201, - 202, 203, 204, 429, and 500 as valid status codes. - -**default**: 100..=103,200..=299 - - -### --archive - -```bash -lychee --archive -``` - Specify the use of a specific web archive. Can be used in combination with `--suggest` - -**possible values**: wayback - - -### -b, --base-url - -```bash -lychee --base-url -``` - Base URL to use when resolving relative URLs in local files. If specified, - relative links in local files are interpreted as being relative to the given - base URL. - - For example, given a base URL of `https://example.com/dir/page`, the link `a` - would resolve to `https://example.com/dir/a` and the link `/b` would resolve - to `https://example.com/b`. This behavior is not affected by the filesystem - path of the file containing these links. - - Note that relative URLs without a leading slash become siblings of the base - URL. If, instead, the base URL ended in a slash, the link would become a child - of the base URL. For example, a base URL of `https://example.com/dir/page/` and - a link of `a` would resolve to `https://example.com/dir/page/a`. - - Basically, the base URL option resolves links as if the local files were hosted - at the given base URL address. - - The provided base URL value must either be a URL (with scheme) or an absolute path. - Note that certain URL schemes cannot be used as a base, e.g., `data` and `mailto`. - -### --base - -```bash -lychee --base -``` - Deprecated; use `--base-url` instead - -### --basic-auth - -```bash -lychee --basic-auth -``` - Basic authentication support. E.g. `http://example.com username:password` - -### -c, --config - -```bash -lychee --config -``` - Configuration file to use - -**default**: lychee.toml - - -### --cache - -```bash -lychee --cache -``` - Use request cache stored on disk at `.lycheecache` - -### --cache-exclude-status - -```bash -lychee --cache-exclude-status -``` - A list of status codes that will be ignored from the cache - - The following exclude range syntax is supported: [start]..[[=]end]|code. Some valid - examples are: - - - 429 (excludes the 429 status code only) - - 500.. (excludes any status code >= 500) - - ..100 (excludes any status code < 100) - - 500..=599 (excludes any status code from 500 to 599 inclusive) - - 500..600 (excludes any status code from 500 to 600 excluding 600, same as 500..=599) - - Use "lychee --cache-exclude-status '429, 500..502' ..." to provide a - comma-separated list of excluded status codes. This example will not cache results - with a status code of 429, 500 and 501. - -### --cookie-jar - -```bash -lychee --cookie-jar -``` - Tell lychee to read cookies from the given file. Cookies will be stored in the - cookie jar and sent with requests. New cookies will be stored in the cookie jar - and existing cookies will be updated. - -### --default-extension - -```bash -lychee --default-extension -``` - This is the default file extension that is applied to files without an extension. - - This is useful for files without extensions or with unknown extensions. The extension will be used to determine the file type for processing. Examples: --default-extension md, --default-extension html - -### --dump - -```bash -lychee --dump -``` - Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked - -### --dump-inputs - -```bash -lychee --dump-inputs -``` - Don't perform any link extraction and checking. Instead, dump all input sources from which links would be collected - -### -E, --exclude-all-private - -```bash -lychee --exclude-all-private -``` - Exclude all private IPs from checking. - Equivalent to `--exclude-private --exclude-link-local --exclude-loopback` - -### --exclude - -```bash -lychee --exclude -``` - Exclude URLs and mail addresses from checking. The values are treated as regular expressions - -### --exclude-file - -```bash -lychee --exclude-file -``` - Deprecated; use `--exclude-path` instead - -### --exclude-link-local - -```bash -lychee --exclude-link-local -``` - Exclude link-local IP address range from checking - -### --exclude-loopback - -```bash -lychee --exclude-loopback -``` - Exclude loopback IP address range and localhost from checking - -### --exclude-path - -```bash -lychee --exclude-path -``` - Exclude paths from getting checked. The values are treated as regular expressions - -### --exclude-private - -```bash -lychee --exclude-private -``` - Exclude private IP address ranges from checking - -### --extensions - -```bash -lychee --extensions -``` - Test the specified file extensions for URIs when checking files locally. - - Multiple extensions can be separated by commas. Note that if you want to check filetypes, - which have multiple extensions, e.g. HTML files with both .html and .htm extensions, you need to - specify both extensions explicitly. - -**default**: md,mkd,mdx,mdown,mdwn,mkdn,mkdown,markdown,html,htm,txt - - -### -f, --format - -```bash -lychee --format -``` - Output format of final status report - -**default**: compact - -**possible values**: compact, detailed, json, markdown, raw - - -### --fallback-extensions - -```bash -lychee --fallback-extensions -``` - When checking locally, attempts to locate missing files by trying the given - fallback extensions. Multiple extensions can be separated by commas. Extensions - will be checked in order of appearance. - - Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi - - Note: This option takes effect on `file://` URIs which do not exist and on - `file://` URIs pointing to directories which resolve to themself (by the - --index-files logic). - -### --files-from - -```bash -lychee --files-from -``` - Read input filenames from the given file or stdin (if path is '-'). - - This is useful when you have a large number of inputs that would be - cumbersome to specify on the command line directly. - - Examples: - lychee --files-from list.txt - find . -name '*.md' | lychee --files-from - - echo 'README.md' | lychee --files-from - - - File Format: - Each line should contain one input (file path, URL, or glob pattern). - Lines starting with '#' are treated as comments and ignored. - Empty lines are also ignored. - -### --generate - -```bash -lychee --generate -``` - Generate special output (e.g. the man page) instead of performing link checking - -**possible values**: man - - -### --github-token - -```bash -lychee --github-token -``` - GitHub API token to use when checking github.com links, to avoid rate limiting - -**env**: GITHUB_TOKEN - - -### --glob-ignore-case - -```bash -lychee --glob-ignore-case -``` - Ignore case when expanding filesystem path glob inputs - -### -h, --help - -```bash -lychee --help -``` - Print help (see a summary with '-h') - -### -H, --header - -```bash -lychee --header -``` - Set custom header for requests - - Some websites require custom headers to be passed in order to return valid responses. - You can specify custom headers in the format 'Name: Value'. For example, 'Accept: text/html'. - This is the same format that other tools like curl or wget use. - Multiple headers can be specified by using the flag multiple times. - -### --hidden - -```bash -lychee --hidden -``` - Do not skip hidden directories and files - -### -i, --insecure - -```bash -lychee --insecure -``` - Proceed for server connections considered insecure (invalid TLS) - -### --include - -```bash -lychee --include -``` - URLs to check (supports regex). Has preference over all excludes - -### --include-fragments - -```bash -lychee --include-fragments -``` - Enable the checking of fragments in links - -### --include-mail - -```bash -lychee --include-mail -``` - Also check email addresses - -### --include-verbatim - -```bash -lychee --include-verbatim -``` - Find links in verbatim sections like `pre`- and `code` blocks - -### --include-wikilinks - -```bash -lychee --include-wikilinks -``` - Check WikiLinks in Markdown files - -### --index-files - -```bash -lychee --index-files -``` - When checking locally, resolves directory links to a separate index file. - The argument is a comma-separated list of index file names to search for. Index - names are relative to the link's directory and attempted in the order given. - - If `--index-files` is specified, then at least one index file must exist in - order for a directory link to be considered valid. Additionally, the special - name `.` can be used in the list to refer to the directory itself. - - If unspecified (the default behavior), index files are disabled and directory - links are considered valid as long as the directory exists on disk. - - Example 1: `--index-files index.html,readme.md` looks for index.html or readme.md - and requires that at least one exists. - - Example 2: `--index-files index.html,.` will use index.html if it exists, but - still accept the directory link regardless. - - Example 3: `--index-files ''` will reject all directory links because there are - no valid index files. This will require every link to explicitly name - a file. - - Note: This option only takes effect on `file://` URIs which exist and point to a directory. - -### -m, --max-redirects - -```bash -lychee --max-redirects -``` - Maximum number of allowed redirects - -**default**: 5 - - -### --max-cache-age - -```bash -lychee --max-cache-age -``` - Discard all cached requests older than this duration - -**default**: 1d - - -### --max-concurrency - -```bash -lychee --max-concurrency -``` - Maximum number of concurrent network requests - -**default**: 128 - - -### --max-retries - -```bash -lychee --max-retries -``` - Maximum number of retries per request - -**default**: 3 - - -### --min-tls - -```bash -lychee --min-tls -``` - Minimum accepted TLS Version - -**possible values**: TLSv1_0, TLSv1_1, TLSv1_2, TLSv1_3 - - -### --mode - -```bash -lychee --mode -``` - Set the output display mode. Determines how results are presented in the terminal - -**default**: color - -**possible values**: plain, color, emoji, task - - -### -n, --no-progress - -```bash -lychee --no-progress -``` - Do not show progress bar. - This is recommended for non-interactive shells (e.g. for continuous integration) - -### --no-ignore - -```bash -lychee --no-ignore -``` - Do not skip files that would otherwise be ignored by '.gitignore', '.ignore', or the global ignore file - -### -o, --output - -```bash -lychee --output -``` - Output file of status report - -### --offline - -```bash -lychee --offline -``` - Only check local files and block network requests - -### -q, --quiet... - -```bash -lychee --quiet... -``` - Less output per occurrence (e.g. `-q` or `-qq`) - -### -r, --retry-wait-time - -```bash -lychee --retry-wait-time -``` - Minimum wait time in seconds between retries of failed requests - -**default**: 1 - - -### --remap - -```bash -lychee --remap -``` - Remap URI matching pattern to different URI - -### --require-https - -```bash -lychee --require-https -``` - When HTTPS is available, treat HTTP links as errors - -### --root-dir - -```bash -lychee --root-dir -``` - Root directory to use when checking absolute links in local files. This option is - required if absolute links appear in local files, otherwise those links will be - flagged as errors. This must be an absolute path (i.e., one beginning with `/`). - - If specified, absolute links in local files are resolved by prefixing the given - root directory to the requested absolute link. For example, with a root-dir of - `/root/dir`, a link to `/page.html` would be resolved to `/root/dir/page.html`. - - This option can be specified alongside `--base-url`. If both are given, an - absolute link is resolved by constructing a URL from three parts: the domain - name specified in `--base-url`, followed by the `--root-dir` directory path, - followed by the absolute link's own path. - -### -s, --scheme - -```bash -lychee --scheme -``` - Only test links with the given schemes (e.g. https). Omit to check links with - any other scheme. At the moment, we support http, https, file, and mailto. - -### --skip-missing - -```bash -lychee --skip-missing -``` - Skip missing input files (default is to error if they don't exist) - -### --suggest - -```bash -lychee --suggest -``` - Suggest link replacements for broken links, using a web archive. The web archive can be specified with `--archive` - -### -t, --timeout - -```bash -lychee --timeout -``` - Website timeout in seconds from connect to response finished - -**default**: 20 - - -### -T, --threads - -```bash -lychee --threads -``` - Number of threads to utilize. Defaults to number of cores available to the system - -### -u, --user-agent - -```bash -lychee --user-agent -``` - User agent - -**default**: lychee/0.20.1 - - -### -v, --verbose... - -```bash -lychee --verbose... -``` - Set verbosity level; more output per occurrence (e.g. `-v` or `-vv`) - -### -V, --version - -```bash -lychee --version -``` - Print version - -### -X, --method - -```bash -lychee --method -``` - Request method - -**default**: get - - -## Repeating Options - -Some options can be specified multiple times. This is true for: - -- `--exclude` -- `--exclude-path` -- `--header` -- `--include` -- `--remap` -- `--scheme` - -Here is an example: - -```bash -lychee --exclude https://example.com --exclude https://example.org README.md -``` - -To specify multiple values in this way, the argument flag should be repeated. -Otherwise, the extra values would be treated as link checking inputs. - From 35353c7392d2a53dbd646b8ec1cfd9ac1b6f0c9e Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 8 Nov 2025 13:39:38 +1000 Subject: [PATCH 06/20] trying single code block highlight again. why is it blue??? --- astro.config.mjs | 2 ++ src/CodeMarkerAnchorPlugin.mjs | 49 ++++++++++++++++++++++++++++++++ src/fetchReadme.ts | 52 ++++++++++++---------------------- 3 files changed, 69 insertions(+), 34 deletions(-) create mode 100644 src/CodeMarkerAnchorPlugin.mjs diff --git a/astro.config.mjs b/astro.config.mjs index adbf1b5..f6d39b7 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -2,6 +2,7 @@ import starlight from "@astrojs/starlight"; import { defineConfig } from "astro/config"; import { generateCliOptionsIntegration } from "./src/fetchReadme"; import smartypants from "remark-smartypants"; +import { pluginCodeMarkerAnchors } from "./src/CodeMarkerAnchorPlugin.mjs"; // https://astro.build/config export default defineConfig({ @@ -17,6 +18,7 @@ export default defineConfig({ generateCliOptionsIntegration("src/content/docs/guides/_cli.md"), starlight({ expressiveCode: { + plugins: [pluginCodeMarkerAnchors()], themes: ["catppuccin-frappe", "catppuccin-latte"], }, title: "Docs", diff --git a/src/CodeMarkerAnchorPlugin.mjs b/src/CodeMarkerAnchorPlugin.mjs new file mode 100644 index 0000000..56c7492 --- /dev/null +++ b/src/CodeMarkerAnchorPlugin.mjs @@ -0,0 +1,49 @@ +/** + * [Expressive code plugin] for transforming [markers][] beginning with `#` + * into anchor links. The marker text is replaced with only `#` and + * the marker becomes a hyperlink to the anchor itself. + * + * This allows creating links to certain named positions within a code block. + * + * [markers]: https://expressive-code.com/key-features/text-markers/ + * [Expressive code plugin]: https://expressive-code.com/reference/plugin-hooks/ + */ +export function pluginCodeMarkerAnchors() { + /** @type {import('astro-expressive-code').ExpressiveCodePlugin} */ + const plugin = { + name: "CodeMarkerAnchorPlugin", + hooks: { + // first, markers beginning with `#` are identified. the anchor ID is + // stored, then the text is replaced with just `#`. + postprocessAnnotations: async (x) => { + for (const line of x.codeBlock.getLines()) { + for (const annot of line.getAnnotations()) { + if (annot.markerType !== "mark") continue; + if (annot.label?.startsWith("#")) { + // NOTE: this is kind of hacky, it's adding a new field into the + // TextMarkerAnnotation class. + annot.anchor = annot.label.replace("#", ""); + annot.label = "#"; + } + } + } + }, + + // secondly, rendered markers with recorded anchors are turned into + // hyperlinks with id attributes. + postprocessRenderedLine: async (x) => { + const annot = x.line.getAnnotations().find((x) => !!x.anchor); + if (!annot) return; + + const lineAst = x.renderData.lineAst; + + lineAst.tagName = "a"; + lineAst.properties.id = annot.anchor; + lineAst.properties.href = `#${annot.anchor}`; + lineAst.properties.style = `text-decoration-line: none;${lineAst.properties.style ?? ""}`; + }, + }, + }; + + return plugin; +} diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 8e81fd6..8a13a78 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -53,38 +53,22 @@ function* generateMarkdown(lines: string[]) { const bodyRegex = /^ (.*)/; const defaultValuesRegex = /^\[(default|possible values|env): (.*)\]$/; - let match; - for (const line of lines) { - if (line.match(usageRegex)) { - yield '```'; - yield line; - yield '```'; - - } else if (line.match(headingRegex)) { - yield "## " + escapeMarkdown(line.replace(/:$/, '')); - - } else if (match = line.match(optionRegex)) { - const option = escapeMarkdown(match[0]).trim(); - const longOption = line.replace(/-[^-],/, ''); - yield `### ${option}`; - yield ''; - yield '```bash'; - yield `lychee ${longOption.trimStart()}`; - yield '```'; - - } else if (match = line.match(bodyRegex)) { - const line = match[1]; - if (match = line.match(defaultValuesRegex)) { - yield `**${match[1]}**: ${match[2]}`; - yield ''; - } else { - yield ' ' + line; - } - - } else { - yield line; + const markers: {[l: string]: number} = {}; + for (const [i, line] of lines.entries()) { + const match = line.match(optionRegex); + if (match) { + markers[`#${match[0].trim()}`] = i + 1; } } + + for (const line of Object.keys(markers)) { + yield '### ' + line.replace('#', ''); + yield ''; + } + + yield '```text ' + Object.entries(markers).map(([l, i]) => JSON.stringify({[l]: i})).join(' ') + yield* lines; + yield '```'; } export async function generateCliOptionsMarkdown() { @@ -94,10 +78,10 @@ export async function generateCliOptionsMarkdown() { const rawUsageText = extractHelpFromReadme(await readme.text()); const usageText = [...generateMarkdown(splitLines(rawUsageText))].join("\n"); - assert(usageText.match('\n## Options\n'), 'options heading missing, check headingRegex'); - assert(usageText.match('\n### --dump\n'), '--dump heading missing, check optionRegex'); - assert(usageText.match('\n### --root-dir\n'), '--root-dir heading missing, check optionRegex'); - assert(usageText.match('\n Inputs for link checking'), 'expected body text missing, check bodyRegex'); + // assert(usageText.match('\n## Options\n'), 'options heading missing, check headingRegex'); + // assert(usageText.match('\n### --dump\n'), '--dump heading missing, check optionRegex'); + // assert(usageText.match('\n### --root-dir\n'), '--root-dir heading missing, check optionRegex'); + // assert(usageText.match('\n Inputs for link checking'), 'expected body text missing, check bodyRegex'); return usageText; } From 2b85c81f86ebae7bd12c39247fa3afdf8adcd092 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 8 Nov 2025 13:50:31 +1000 Subject: [PATCH 07/20] steal starlight's colours to make it grey ;-; --- astro.config.mjs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/astro.config.mjs b/astro.config.mjs index f6d39b7..f1a519f 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -4,6 +4,11 @@ import { generateCliOptionsIntegration } from "./src/fetchReadme"; import smartypants from "remark-smartypants"; import { pluginCodeMarkerAnchors } from "./src/CodeMarkerAnchorPlugin.mjs"; +// https://github.com/withastro/starlight/blob/main/packages/starlight/integrations/expressive-code/theming.ts +const isDark = false; +const neutralMinimal = isDark ? '#ffffff17' : '#0000001a'; +const neutralDimmed = isDark ? '#ffffff40' : '#00000055'; + // https://astro.build/config export default defineConfig({ site: "https://lychee.cli.rs", @@ -20,6 +25,12 @@ export default defineConfig({ expressiveCode: { plugins: [pluginCodeMarkerAnchors()], themes: ["catppuccin-frappe", "catppuccin-latte"], + styleOverrides: { + textMarkers: { + markBackground: neutralMinimal, + markBorderColor: neutralDimmed, + } + } }, title: "Docs", description: From 5bd52be667e5df47275c47e25ef860bd3dcc4faf Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 8 Nov 2025 13:51:19 +1000 Subject: [PATCH 08/20] Revert "steal starlight's colours to make it grey ;-;" This reverts commit 2b85c81f86ebae7bd12c39247fa3afdf8adcd092. --- astro.config.mjs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/astro.config.mjs b/astro.config.mjs index f1a519f..f6d39b7 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -4,11 +4,6 @@ import { generateCliOptionsIntegration } from "./src/fetchReadme"; import smartypants from "remark-smartypants"; import { pluginCodeMarkerAnchors } from "./src/CodeMarkerAnchorPlugin.mjs"; -// https://github.com/withastro/starlight/blob/main/packages/starlight/integrations/expressive-code/theming.ts -const isDark = false; -const neutralMinimal = isDark ? '#ffffff17' : '#0000001a'; -const neutralDimmed = isDark ? '#ffffff40' : '#00000055'; - // https://astro.build/config export default defineConfig({ site: "https://lychee.cli.rs", @@ -25,12 +20,6 @@ export default defineConfig({ expressiveCode: { plugins: [pluginCodeMarkerAnchors()], themes: ["catppuccin-frappe", "catppuccin-latte"], - styleOverrides: { - textMarkers: { - markBackground: neutralMinimal, - markBorderColor: neutralDimmed, - } - } }, title: "Docs", description: From 0975b2933f90e107a07b881999f6f47e9c384a66 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 8 Nov 2025 13:51:20 +1000 Subject: [PATCH 09/20] Revert "trying single code block highlight again. why is it blue???" This reverts commit 35353c7392d2a53dbd646b8ec1cfd9ac1b6f0c9e. --- astro.config.mjs | 2 -- src/CodeMarkerAnchorPlugin.mjs | 49 -------------------------------- src/fetchReadme.ts | 52 ++++++++++++++++++++++------------ 3 files changed, 34 insertions(+), 69 deletions(-) delete mode 100644 src/CodeMarkerAnchorPlugin.mjs diff --git a/astro.config.mjs b/astro.config.mjs index f6d39b7..adbf1b5 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -2,7 +2,6 @@ import starlight from "@astrojs/starlight"; import { defineConfig } from "astro/config"; import { generateCliOptionsIntegration } from "./src/fetchReadme"; import smartypants from "remark-smartypants"; -import { pluginCodeMarkerAnchors } from "./src/CodeMarkerAnchorPlugin.mjs"; // https://astro.build/config export default defineConfig({ @@ -18,7 +17,6 @@ export default defineConfig({ generateCliOptionsIntegration("src/content/docs/guides/_cli.md"), starlight({ expressiveCode: { - plugins: [pluginCodeMarkerAnchors()], themes: ["catppuccin-frappe", "catppuccin-latte"], }, title: "Docs", diff --git a/src/CodeMarkerAnchorPlugin.mjs b/src/CodeMarkerAnchorPlugin.mjs deleted file mode 100644 index 56c7492..0000000 --- a/src/CodeMarkerAnchorPlugin.mjs +++ /dev/null @@ -1,49 +0,0 @@ -/** - * [Expressive code plugin] for transforming [markers][] beginning with `#` - * into anchor links. The marker text is replaced with only `#` and - * the marker becomes a hyperlink to the anchor itself. - * - * This allows creating links to certain named positions within a code block. - * - * [markers]: https://expressive-code.com/key-features/text-markers/ - * [Expressive code plugin]: https://expressive-code.com/reference/plugin-hooks/ - */ -export function pluginCodeMarkerAnchors() { - /** @type {import('astro-expressive-code').ExpressiveCodePlugin} */ - const plugin = { - name: "CodeMarkerAnchorPlugin", - hooks: { - // first, markers beginning with `#` are identified. the anchor ID is - // stored, then the text is replaced with just `#`. - postprocessAnnotations: async (x) => { - for (const line of x.codeBlock.getLines()) { - for (const annot of line.getAnnotations()) { - if (annot.markerType !== "mark") continue; - if (annot.label?.startsWith("#")) { - // NOTE: this is kind of hacky, it's adding a new field into the - // TextMarkerAnnotation class. - annot.anchor = annot.label.replace("#", ""); - annot.label = "#"; - } - } - } - }, - - // secondly, rendered markers with recorded anchors are turned into - // hyperlinks with id attributes. - postprocessRenderedLine: async (x) => { - const annot = x.line.getAnnotations().find((x) => !!x.anchor); - if (!annot) return; - - const lineAst = x.renderData.lineAst; - - lineAst.tagName = "a"; - lineAst.properties.id = annot.anchor; - lineAst.properties.href = `#${annot.anchor}`; - lineAst.properties.style = `text-decoration-line: none;${lineAst.properties.style ?? ""}`; - }, - }, - }; - - return plugin; -} diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 8a13a78..8e81fd6 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -53,22 +53,38 @@ function* generateMarkdown(lines: string[]) { const bodyRegex = /^ (.*)/; const defaultValuesRegex = /^\[(default|possible values|env): (.*)\]$/; - const markers: {[l: string]: number} = {}; - for (const [i, line] of lines.entries()) { - const match = line.match(optionRegex); - if (match) { - markers[`#${match[0].trim()}`] = i + 1; - } - } + let match; + for (const line of lines) { + if (line.match(usageRegex)) { + yield '```'; + yield line; + yield '```'; + + } else if (line.match(headingRegex)) { + yield "## " + escapeMarkdown(line.replace(/:$/, '')); + + } else if (match = line.match(optionRegex)) { + const option = escapeMarkdown(match[0]).trim(); + const longOption = line.replace(/-[^-],/, ''); + yield `### ${option}`; + yield ''; + yield '```bash'; + yield `lychee ${longOption.trimStart()}`; + yield '```'; + + } else if (match = line.match(bodyRegex)) { + const line = match[1]; + if (match = line.match(defaultValuesRegex)) { + yield `**${match[1]}**: ${match[2]}`; + yield ''; + } else { + yield ' ' + line; + } - for (const line of Object.keys(markers)) { - yield '### ' + line.replace('#', ''); - yield ''; + } else { + yield line; + } } - - yield '```text ' + Object.entries(markers).map(([l, i]) => JSON.stringify({[l]: i})).join(' ') - yield* lines; - yield '```'; } export async function generateCliOptionsMarkdown() { @@ -78,10 +94,10 @@ export async function generateCliOptionsMarkdown() { const rawUsageText = extractHelpFromReadme(await readme.text()); const usageText = [...generateMarkdown(splitLines(rawUsageText))].join("\n"); - // assert(usageText.match('\n## Options\n'), 'options heading missing, check headingRegex'); - // assert(usageText.match('\n### --dump\n'), '--dump heading missing, check optionRegex'); - // assert(usageText.match('\n### --root-dir\n'), '--root-dir heading missing, check optionRegex'); - // assert(usageText.match('\n Inputs for link checking'), 'expected body text missing, check bodyRegex'); + assert(usageText.match('\n## Options\n'), 'options heading missing, check headingRegex'); + assert(usageText.match('\n### --dump\n'), '--dump heading missing, check optionRegex'); + assert(usageText.match('\n### --root-dir\n'), '--root-dir heading missing, check optionRegex'); + assert(usageText.match('\n Inputs for link checking'), 'expected body text missing, check bodyRegex'); return usageText; } From e6b31e956842b6e79b0851798dcf7f0d76cedfc1 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 00:13:39 +1000 Subject: [PATCH 10/20] biome --- astro.config.mjs | 6 +- src/components/code.astro | 3 + src/fetchReadme.ts | 232 ++++++++++++++++++++------------------ 3 files changed, 129 insertions(+), 112 deletions(-) diff --git a/astro.config.mjs b/astro.config.mjs index adbf1b5..dbe43f1 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -1,7 +1,7 @@ import starlight from "@astrojs/starlight"; import { defineConfig } from "astro/config"; -import { generateCliOptionsIntegration } from "./src/fetchReadme"; import smartypants from "remark-smartypants"; +import { generateCliOptionsIntegration } from "./src/fetchReadme"; // https://astro.build/config export default defineConfig({ @@ -10,8 +10,8 @@ export default defineConfig({ remarkPlugins: [ // automatically converting smart dashes causes problems with cli arguments. // to insert dashes, use unicode or — or –. - [smartypants, { dashes: false }] - ] + [smartypants, { dashes: false }], + ], }, integrations: [ generateCliOptionsIntegration("src/content/docs/guides/_cli.md"), diff --git a/src/components/code.astro b/src/components/code.astro index 0d309f9..585d065 100644 --- a/src/components/code.astro +++ b/src/components/code.astro @@ -1,4 +1,7 @@ --- + + + import fs from "node:fs/promises"; import { Code as SCode } from "@astrojs/starlight/components"; diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 8e81fd6..1d51f41 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -1,133 +1,147 @@ -import type { AstroIntegration } from 'astro'; -import assert from 'node:assert'; -import { readFileSync, realpathSync, rmSync, writeFileSync } from 'node:fs'; -import { basename, dirname, join } from 'node:path'; +import assert from "node:assert"; +import { readFileSync, realpathSync, rmSync, writeFileSync } from "node:fs"; +import { basename, dirname, join } from "node:path"; +import type { AstroIntegration } from "astro"; const VERSION = "lychee-v0.21.0"; // https://raw.githubusercontent.com/lycheeverse/lychee/master/README.md const url = `https://raw.githubusercontent.com/lycheeverse/lychee/refs/tags/${VERSION}/README.md`; -const TEMPLATE = 'README-OPTIONS-PLACEHOLDER'; +const TEMPLATE = "README-OPTIONS-PLACEHOLDER"; function extractHelpFromReadme(readme: string) { - const [, section] = readme.split(/### Commandline Parameters/, 2); - if (!section) - throw new Error( - "LycheeCliOptions: commandline parameters section not found in readme", - ); - - const [, text] = section.split("\n```help-message\n", 2); - if (!text) - throw new Error( - "LycheeCliOptions: ```help-message marker not found in commandline parameters section", - ); - - const [helpText] = text.split("\n```\n", 2); - if (!helpText) - throw new Error( - "LycheeCliOptions: closing ``` marker not found after ```text", - ); - - return helpText; + const [, section] = readme.split(/### Commandline Parameters/, 2); + if (!section) + throw new Error( + "LycheeCliOptions: commandline parameters section not found in readme", + ); + + const [, text] = section.split("\n```help-message\n", 2); + if (!text) + throw new Error( + "LycheeCliOptions: ```help-message marker not found in commandline parameters section", + ); + + const [helpText] = text.split("\n```\n", 2); + if (!helpText) + throw new Error( + "LycheeCliOptions: closing ``` marker not found after ```text", + ); + + return helpText; } // https://stackoverflow.com/a/6234804 function escapeMarkdown(unsafe: string): string { - return unsafe - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """) - .replace(/'/g, "'"); + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); } function splitLines(s: string): string[] { - return s.split(/\r?\n/g); + return s.split(/\r?\n/g); } +// biome-ignore-start lint/suspicious/noAssignInExpressions: using assignment expressions for regex match is conventional function* generateMarkdown(lines: string[]) { - const headingRegex = /^\w+:$/; - const optionRegex = /^[- ,a-zA-Z]{2,6}(--|\[)([a-z-.\]]+)/; - const usageRegex = /^Usage: /; - const bodyRegex = /^ (.*)/; - const defaultValuesRegex = /^\[(default|possible values|env): (.*)\]$/; - - let match; - for (const line of lines) { - if (line.match(usageRegex)) { - yield '```'; - yield line; - yield '```'; - - } else if (line.match(headingRegex)) { - yield "## " + escapeMarkdown(line.replace(/:$/, '')); - - } else if (match = line.match(optionRegex)) { - const option = escapeMarkdown(match[0]).trim(); - const longOption = line.replace(/-[^-],/, ''); - yield `### ${option}`; - yield ''; - yield '```bash'; - yield `lychee ${longOption.trimStart()}`; - yield '```'; - - } else if (match = line.match(bodyRegex)) { - const line = match[1]; - if (match = line.match(defaultValuesRegex)) { - yield `**${match[1]}**: ${match[2]}`; - yield ''; - } else { - yield ' ' + line; - } - - } else { - yield line; - } - } + const headingRegex = /^\w+:$/; + const optionRegex = /^[- ,a-zA-Z]{2,6}(--|\[)([a-z-.\]]+)/; + const usageRegex = /^Usage: /; + const bodyRegex = /^ {10}(.*)/; + const defaultValuesRegex = /^\[(default|possible values|env): (.*)\]$/; + + let match: RegExpMatchArray | null = null; + for (const line of lines) { + if (line.match(usageRegex)) { + yield "```"; + yield line; + yield "```"; + } else if (line.match(headingRegex)) { + yield `## ${escapeMarkdown(line.replace(/:$/, ""))}`; + } else if ((match = line.match(optionRegex))) { + const option = escapeMarkdown(match[0]).trim(); + const longOption = line.replace(/-[^-],/, ""); + yield `### ${option}`; + yield ""; + yield "```bash"; + yield `lychee ${longOption.trimStart()}`; + yield "```"; + } else if ((match = line.match(bodyRegex))) { + const line = match[1]; + if ((match = line.match(defaultValuesRegex))) { + yield `**${match[1]}**: ${match[2]}`; + yield ""; + } else { + yield ` ${line}`; + } + } else { + yield line; + } + } } +// biome-ignore-end lint/suspicious/noAssignInExpressions: using assignment expressions for regex match is conventional export async function generateCliOptionsMarkdown() { - const readme = await fetch(url); - assert(readme.ok, `${readme.status} when fetching ${url}`); - - const rawUsageText = extractHelpFromReadme(await readme.text()); - const usageText = [...generateMarkdown(splitLines(rawUsageText))].join("\n"); - - assert(usageText.match('\n## Options\n'), 'options heading missing, check headingRegex'); - assert(usageText.match('\n### --dump\n'), '--dump heading missing, check optionRegex'); - assert(usageText.match('\n### --root-dir\n'), '--root-dir heading missing, check optionRegex'); - assert(usageText.match('\n Inputs for link checking'), 'expected body text missing, check bodyRegex'); - - return usageText; + const readme = await fetch(url); + assert(readme.ok, `${readme.status} when fetching ${url}`); + + const rawUsageText = extractHelpFromReadme(await readme.text()); + const usageText = [...generateMarkdown(splitLines(rawUsageText))].join("\n"); + + assert( + usageText.match("\n## Options\n"), + "options heading missing, check headingRegex", + ); + assert( + usageText.match("\n### --dump\n"), + "--dump heading missing, check optionRegex", + ); + assert( + usageText.match("\n### --root-dir\n"), + "--root-dir heading missing, check optionRegex", + ); + assert( + usageText.match("\n Inputs for link checking"), + "expected body text missing, check bodyRegex", + ); + + return usageText; } - -export function generateCliOptionsIntegration(templatePath: string): AstroIntegration { - const [dir, file] = [dirname(templatePath), basename(templatePath)]; - - const outputPath = join(dir, file.replace('_', '')); - - return { - name: 'lycheeverse:generate-cli-page', - hooks: { - 'astro:config:setup': async ({ logger, addWatchFile }) => { - logger.info("Using template file " + templatePath); - - addWatchFile(realpathSync(templatePath)); - addWatchFile(import.meta.filename); - - logger.info("Fetching from git tag " + VERSION); - rmSync(outputPath, { force: true }); - const usageText = generateCliOptionsMarkdown(); - - const docTemplateText = readFileSync(templatePath, "utf-8"); - const docOutput = docTemplateText.replace(TEMPLATE, await usageText); - - assert(docOutput != docTemplateText, `Placeholder ${TEMPLATE} not found in template file`); - logger.info("Writing output file " + outputPath); - writeFileSync(outputPath, docOutput); - } - } - }; +export function generateCliOptionsIntegration( + templatePath: string, +): AstroIntegration { + const [dir, file] = [dirname(templatePath), basename(templatePath)]; + + const outputPath = join(dir, file.replace("_", "")); + + return { + name: "lycheeverse:generate-cli-page", + hooks: { + "astro:config:setup": async ({ logger, addWatchFile }) => { + logger.info(`Using template file ${templatePath}`); + + addWatchFile(realpathSync(templatePath)); + addWatchFile(import.meta.filename); + + logger.info(`Fetching from git tag ${VERSION}`); + rmSync(outputPath, { force: true }); + const usageText = generateCliOptionsMarkdown(); + + const docTemplateText = readFileSync(templatePath, "utf-8"); + const docOutput = docTemplateText.replace(TEMPLATE, await usageText); + + assert( + docOutput !== docTemplateText, + `Placeholder ${TEMPLATE} not found in template file`, + ); + logger.info(`Writing output file ${outputPath}`); + writeFileSync(outputPath, docOutput); + }, + }, + }; } From 0c90d66ddff9cad0715602ea7067fdfc4680dcd1 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 00:15:20 +1000 Subject: [PATCH 11/20] remove escapeMarkdown --- src/fetchReadme.ts | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 1d51f41..1f21977 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -32,23 +32,13 @@ function extractHelpFromReadme(readme: string) { return helpText; } -// https://stackoverflow.com/a/6234804 -function escapeMarkdown(unsafe: string): string { - return unsafe - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """) - .replace(/'/g, "'"); -} - function splitLines(s: string): string[] { return s.split(/\r?\n/g); } // biome-ignore-start lint/suspicious/noAssignInExpressions: using assignment expressions for regex match is conventional function* generateMarkdown(lines: string[]) { - const headingRegex = /^\w+:$/; + const headingRegex = /^(\w+):$/; const optionRegex = /^[- ,a-zA-Z]{2,6}(--|\[)([a-z-.\]]+)/; const usageRegex = /^Usage: /; const bodyRegex = /^ {10}(.*)/; @@ -60,16 +50,17 @@ function* generateMarkdown(lines: string[]) { yield "```"; yield line; yield "```"; - } else if (line.match(headingRegex)) { - yield `## ${escapeMarkdown(line.replace(/:$/, ""))}`; + } else if ((match = line.match(headingRegex))) { + yield `## ${match[1]}`; } else if ((match = line.match(optionRegex))) { - const option = escapeMarkdown(match[0]).trim(); + const option = match[0].trim(); const longOption = line.replace(/-[^-],/, ""); yield `### ${option}`; yield ""; yield "```bash"; yield `lychee ${longOption.trimStart()}`; yield "```"; + yield ""; } else if ((match = line.match(bodyRegex))) { const line = match[1]; if ((match = line.match(defaultValuesRegex))) { From 0936b318a95b21226e65bb2edf16524d1466fa50 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 00:15:43 +1000 Subject: [PATCH 12/20] remove default value formatting --- src/fetchReadme.ts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 1f21977..00b3ac2 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -42,7 +42,6 @@ function* generateMarkdown(lines: string[]) { const optionRegex = /^[- ,a-zA-Z]{2,6}(--|\[)([a-z-.\]]+)/; const usageRegex = /^Usage: /; const bodyRegex = /^ {10}(.*)/; - const defaultValuesRegex = /^\[(default|possible values|env): (.*)\]$/; let match: RegExpMatchArray | null = null; for (const line of lines) { @@ -62,13 +61,7 @@ function* generateMarkdown(lines: string[]) { yield "```"; yield ""; } else if ((match = line.match(bodyRegex))) { - const line = match[1]; - if ((match = line.match(defaultValuesRegex))) { - yield `**${match[1]}**: ${match[2]}`; - yield ""; - } else { - yield ` ${line}`; - } + yield ` ${match[1]}`; } else { yield line; } From 35bde021d81650528fa0af2c7a7a0fe4635870f2 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 00:21:24 +1000 Subject: [PATCH 13/20] try add remark-smartypants to package.json --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 623c946..1db09a2 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "@astrojs/check": "^0.9.5", "@astrojs/starlight": "^0.36.2", "astro": "^5.15.3", + "remark-smartypants": "^3.0.2", "sharp": "^0.34.4", "typescript": "^5.9.3" }, From 96550cf9f017cb14cac7485da6a0ba6bf20cc0ea Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 00:35:51 +1000 Subject: [PATCH 14/20] Revert "remove default value formatting" This reverts commit 0936b318a95b21226e65bb2edf16524d1466fa50. --- src/fetchReadme.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 00b3ac2..1f21977 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -42,6 +42,7 @@ function* generateMarkdown(lines: string[]) { const optionRegex = /^[- ,a-zA-Z]{2,6}(--|\[)([a-z-.\]]+)/; const usageRegex = /^Usage: /; const bodyRegex = /^ {10}(.*)/; + const defaultValuesRegex = /^\[(default|possible values|env): (.*)\]$/; let match: RegExpMatchArray | null = null; for (const line of lines) { @@ -61,7 +62,13 @@ function* generateMarkdown(lines: string[]) { yield "```"; yield ""; } else if ((match = line.match(bodyRegex))) { - yield ` ${match[1]}`; + const line = match[1]; + if ((match = line.match(defaultValuesRegex))) { + yield `**${match[1]}**: ${match[2]}`; + yield ""; + } else { + yield ` ${line}`; + } } else { yield line; } From de6c2a849fb0f3fd9282e4b0717cdf92d55e40f6 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 00:42:50 +1000 Subject: [PATCH 15/20] format markdown as rich text, rather than code. i think, that this is generally nicer to read. However, this means that the website text is rendered from the same text as the CLI --help. The website understands Markdown, but the CLI does not (obviously) so we have to be careful. I would suggest that when writing help, the focus should be on readability within the *console*. This means that while Markdown can be used, it should be limited to syntax which is unobtrusive in raw text. Raw text is still the main format which the help text will be rendered in. Personally, these would be okay: - single `*` or `_` for emphasis (with preference for `*`) - single backticks for inline code - four space indentation for code blocks - bullet and numbered lists Imo, these would *not* be okay, because they appear too jarring in plain text: - code blocks with triple backtick fences. this includes any astro-specific asides and the like. - link syntax with `[link](https://url)` - bold when used for subheadings like `**Note**:` I think this is a good compromise which lets the same text be usable for both CLI --help and the website's rich text HTML. --- src/fetchReadme.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/fetchReadme.ts b/src/fetchReadme.ts index 1f21977..e92ee0f 100644 --- a/src/fetchReadme.ts +++ b/src/fetchReadme.ts @@ -47,8 +47,9 @@ function* generateMarkdown(lines: string[]) { let match: RegExpMatchArray | null = null; for (const line of lines) { if (line.match(usageRegex)) { - yield "```"; - yield line; + yield "Usage:"; + yield "```bash"; + yield line.replace(/^Usage: /, ""); yield "```"; } else if ((match = line.match(headingRegex))) { yield `## ${match[1]}`; @@ -67,7 +68,7 @@ function* generateMarkdown(lines: string[]) { yield `**${match[1]}**: ${match[2]}`; yield ""; } else { - yield ` ${line}`; + yield line; } } else { yield line; @@ -96,7 +97,7 @@ export async function generateCliOptionsMarkdown() { "--root-dir heading missing, check optionRegex", ); assert( - usageText.match("\n Inputs for link checking"), + usageText.match("\nInputs for link checking"), "expected body text missing, check bodyRegex", ); From fb71736d31942e005e1906596898efc9d73afd7a Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 17:33:58 +1000 Subject: [PATCH 16/20] pnpm lock --- pnpm-lock.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 24e705b..4737129 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -17,6 +17,9 @@ importers: astro: specifier: ^5.15.3 version: 5.15.3(@types/node@24.10.0)(rollup@4.52.5)(typescript@5.9.3)(yaml@2.8.1) + remark-smartypants: + specifier: ^3.0.2 + version: 3.0.2 sharp: specifier: ^0.34.4 version: 0.34.4 From 70d233c4706b22cf0d36af1c44ffe439262867cb Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 9 Nov 2025 17:37:52 +1000 Subject: [PATCH 17/20] reorder sidebar based on (subjective) frequency of use. also tweak page titles to be in Title Case. --- astro.config.mjs | 4 ++-- src/content/docs/guides/_cli.md | 2 +- src/content/docs/guides/config.md | 2 +- src/content/docs/guides/preprocessing.md | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/astro.config.mjs b/astro.config.mjs index dbe43f1..32164ee 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -48,11 +48,11 @@ export default defineConfig({ label: "Guides", items: [ "guides/getting-started", - "guides/library", - "guides/config", "guides/cli", + "guides/config", "guides/output", "guides/preprocessing", + "guides/library", ], }, { diff --git a/src/content/docs/guides/_cli.md b/src/content/docs/guides/_cli.md index 399bfa8..a0f2db5 100644 --- a/src/content/docs/guides/_cli.md +++ b/src/content/docs/guides/_cli.md @@ -1,5 +1,5 @@ --- -title: CLI +title: Command-Line Flags ---