diff --git a/Dockerfile b/Dockerfile index 4f29535..d278679 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,6 @@ RUN apt-get -y update && \ WORKDIR /home/node/app USER node COPY --chown=node:node . . -# This also installs hsts and tld data files in a postinstall script: RUN npm install ARG GIT_SHA=dev @@ -18,6 +17,5 @@ RUN env ENV RUN_ID=${RUN_ID} ENV GIT_SHA=${GIT_SHA} -ENV NODE_EXTRA_CA_CERTS=node_modules/extra_certs/ca_bundle/ca_intermediate_bundle.pem EXPOSE 8080 CMD [ "node", "src/api/index.js" ] diff --git a/bin/wrapper.js b/bin/wrapper.js index 37fd04c..5128fb7 100755 --- a/bin/wrapper.js +++ b/bin/wrapper.js @@ -9,13 +9,6 @@ import { fileURLToPath } from "node:url"; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); -// Set the environment variable for extra CA certificates -let caCertPath = import.meta.resolve("node_extra_ca_certs_mozilla_bundle"); -caCertPath = new URL(caCertPath).pathname; -caCertPath = path.dirname(caCertPath); -caCertPath = path.join(caCertPath, "ca_bundle", "ca_intermediate_bundle.pem"); -process.env.NODE_EXTRA_CA_CERTS = caCertPath; - // The target script you want to run (relative to this script's directory) const targetScript = path.join(__dirname, "..", "src", "scan.js"); diff --git a/package-lock.json b/package-lock.json index a8c822b..8dca9e0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,7 +7,6 @@ "": { "name": "@mdn/mdn-http-observatory", "version": "1.5.0", - "hasInstallScript": true, "license": "MPL-2.0", "dependencies": { "@fastify/cors": "^11.0.1", @@ -26,7 +25,7 @@ "http-cookie-agent": "^7.0.1", "ip": "^2.0.1", "jsdom": "^27.0.0", - "node_extra_ca_certs_mozilla_bundle": "^1.0.7", + "papaparse": "^5.5.3", "pg": "^8.16.2", "pg-format": "^1.0.4", "pg-native": "^3.5.2", @@ -2075,12 +2074,6 @@ "file-uri-to-path": "1.0.0" } }, - "node_modules/bluebird": { - "version": "3.7.2", - "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz", - "integrity": "sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==", - "license": "MIT" - }, "node_modules/brace-expansion": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", @@ -2460,24 +2453,6 @@ "node": ">=18" } }, - "node_modules/cross-env": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-env/-/cross-env-7.0.3.tgz", - "integrity": "sha512-+/HKd6EgcQCJGh2PSjZuUitQBQynKor4wrFbRg4DtAgS1aWO+gU52xpH7M9ScGgXSYmAVS9bIJ8EzuaGw0oNAw==", - "license": "MIT", - "dependencies": { - "cross-spawn": "^7.0.1" - }, - "bin": { - "cross-env": "src/bin/cross-env.js", - "cross-env-shell": "src/bin/cross-env-shell.js" - }, - "engines": { - "node": ">=10.14", - "npm": ">=6", - "yarn": ">=1" - } - }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -2519,23 +2494,6 @@ "node": ">=20" } }, - "node_modules/csvtojson": { - "version": "2.0.10", - "resolved": "https://registry.npmjs.org/csvtojson/-/csvtojson-2.0.10.tgz", - "integrity": "sha512-lUWFxGKyhraKCW8Qghz6Z0f2l/PqB1W3AO0HKJzGIQ5JRSlR651ekJDiGJbBT4sRNNv5ddnSGVEnsxP9XRCVpQ==", - "license": "MIT", - "dependencies": { - "bluebird": "^3.5.1", - "lodash": "^4.17.3", - "strip-bom": "^2.0.0" - }, - "bin": { - "csvtojson": "bin/csvtojson" - }, - "engines": { - "node": ">=4.0.0" - } - }, "node_modules/data-urls": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-6.0.0.tgz", @@ -3930,12 +3888,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/is-utf8": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-utf8/-/is-utf8-0.2.1.tgz", - "integrity": "sha512-rMYPYvCzsXywIsldgLaSoPlw5PfoB/ssr7hY4pLfcodrA5M/eArza1a9VmTiNIBNMjOGr1Ow9mTyU2o69U6U9Q==", - "license": "MIT" - }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -4553,18 +4505,6 @@ "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, - "node_modules/node_extra_ca_certs_mozilla_bundle": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/node_extra_ca_certs_mozilla_bundle/-/node_extra_ca_certs_mozilla_bundle-1.0.7.tgz", - "integrity": "sha512-wgnipQ71j14/5M//dp0kU8IzUYARoSaRpG0ILtLTa6QHB8EEHfN5OzSmViYxwSom8GTlC6KQC3GT2xs7DCUlRw==", - "hasInstallScript": true, - "license": "MIT", - "dependencies": { - "axios": "^1.6.5", - "cross-env": "^7.0.3", - "csvtojson": "^2.0.10" - } - }, "node_modules/nodemon": { "version": "3.1.10", "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-3.1.10.tgz", @@ -4729,6 +4669,12 @@ "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", "license": "BlueOak-1.0.0" }, + "node_modules/papaparse": { + "version": "5.5.3", + "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.5.3.tgz", + "integrity": "sha512-5QvjGxYVjxO59MGU2lHVYpRWBBtKHnlIAcSe1uNFCkkptUh63NFRj0FJQm7nR67puEruUci/ZkjmEFrjCAyP4A==", + "license": "MIT" + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -5989,18 +5935,6 @@ "node": ">=8" } }, - "node_modules/strip-bom": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-2.0.0.tgz", - "integrity": "sha512-kwrX1y7czp1E69n2ajbG65mIo9dqvJ+8aBQXOGVxqwvNbsXdFM6Lq37dLAY3mknUwru8CfcCbfOLL/gMo+fi3g==", - "license": "MIT", - "dependencies": { - "is-utf8": "^0.2.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", diff --git a/package.json b/package.json index 77cf737..c4964e4 100644 --- a/package.json +++ b/package.json @@ -9,16 +9,14 @@ "npm": ">=9.0.0" }, "scripts": { - "start": "NODE_EXTRA_CA_CERTS=node_modules/node_extra_ca_certs_mozilla_bundle/ca_bundle/ca_intermediate_root_bundle.pem node src/api/index.js", - "dev": "NODE_EXTRA_CA_CERTS=node_modules/node_extra_ca_certs_mozilla_bundle/ca_bundle/ca_intermediate_root_bundle.pem nodemon src/api/index.js", + "start": "node src/api/index.js", + "dev": "nodemon src/api/index.js", "test": "CONFIG_FILE=conf/config-test.json mocha", "tsc": "tsc -p jsconfig.json", - "updateHsts": "node src/retrieve-hsts.js", - "updateTldList": "node src/retrieve-tld-list.js", "refreshMaterializedViews": "node src/maintenance/index.js", + "refreshCache": "node src/cache.js", "maintenance": "node src/maintenance/index.js", - "migrate": "node -e 'import(\"./src/database/migrate.js\").then( m => m.migrateDatabase() )'", - "postinstall": "npm run updateHsts && npm run updateTldList" + "migrate": "node -e 'import(\"./src/database/migrate.js\").then( m => m.migrateDatabase() )'" }, "bin": { "mdn-http-observatory-scan": "bin/wrapper.js" @@ -59,7 +57,7 @@ "http-cookie-agent": "^7.0.1", "ip": "^2.0.1", "jsdom": "^27.0.0", - "node_extra_ca_certs_mozilla_bundle": "^1.0.7", + "papaparse": "^5.5.3", "pg": "^8.16.2", "pg-format": "^1.0.4", "pg-native": "^3.5.2", diff --git a/src/analyzer/hsts.js b/src/analyzer/hsts.js index 464a317..87875a1 100644 --- a/src/analyzer/hsts.js +++ b/src/analyzer/hsts.js @@ -1,9 +1,6 @@ import fs from "fs"; -import path from "node:path"; -import { fileURLToPath } from "node:url"; import { Site } from "../site.js"; - -const dirname = path.dirname(fileURLToPath(import.meta.url)); +import { HSTS_PRELOAD_PATH } from "../cache.js"; /** * @type {import("../types.js").Hsts | null} @@ -15,15 +12,8 @@ let hstsMap = null; */ export function hsts() { if (!hstsMap) { - const filePath = path.join( - dirname, - "..", - "..", - "conf", - "hsts-preload.json" - ); hstsMap = new Map( - Object.entries(JSON.parse(fs.readFileSync(filePath, "utf8"))) + Object.entries(JSON.parse(fs.readFileSync(HSTS_PRELOAD_PATH, "utf8"))) ); } return hstsMap; diff --git a/src/api/index.js b/src/api/index.js index 79191e7..17eebda 100644 --- a/src/api/index.js +++ b/src/api/index.js @@ -1,7 +1,10 @@ import { CONFIG } from "../config.js"; import { createServer } from "./server.js"; +import { setupCache } from "../cache.js"; async function main() { + await setupCache(); + const server = await createServer(); try { await server.listen({ diff --git a/src/api/v2/utils.js b/src/api/v2/utils.js index 171ac0d..ef5d0cf 100644 --- a/src/api/v2/utils.js +++ b/src/api/v2/utils.js @@ -1,8 +1,6 @@ import ip from "ip"; import dns from "node:dns"; import fs from "fs"; -import { fileURLToPath } from "node:url"; -import path from "node:path"; import { InvalidHostNameError, InvalidHostNameIpError, @@ -28,6 +26,7 @@ import { PolicyResponse } from "./schemas.js"; import { Expectation } from "../../types.js"; import { TEST_TITLES } from "../../grader/charts.js"; import { scan } from "../../scanner/index.js"; +import { TLD_LIST_PATH } from "../../cache.js"; /** * @@ -50,7 +49,6 @@ export function isIp(hostname) { * @type {Set | null} */ let tldSet = null; -const dirname = path.dirname(fileURLToPath(import.meta.url)); /** * Get the cached set of top level domains. @@ -58,15 +56,7 @@ const dirname = path.dirname(fileURLToPath(import.meta.url)); */ function tlds() { if (!tldSet) { - const filePath = path.join( - dirname, - "..", - "..", - "..", - "conf", - "tld-list.json" - ); - tldSet = new Set(JSON.parse(fs.readFileSync(filePath, "utf8"))); + tldSet = new Set(JSON.parse(fs.readFileSync(TLD_LIST_PATH, "utf8"))); } return tldSet; } diff --git a/src/ca-bundle.js b/src/ca-bundle.js new file mode 100644 index 0000000..670429a --- /dev/null +++ b/src/ca-bundle.js @@ -0,0 +1,72 @@ +import axios from "axios"; +import { writeFile } from "fs/promises"; +import Papa from "papaparse"; + +const INTERMEDIATE_CA_URL = + "https://ccadb.my.salesforce-sites.com/mozilla/PublicAllIntermediateCertsWithPEMCSV"; + +const ROOT_CA_URL = + "https://ccadb.my.salesforce-sites.com/mozilla/IncludedCACertificateReportPEMCSV"; + +/** + * @param {string} url + * @returns {Promise} + */ +async function downloadCertificates(url) { + let r; + try { + r = await axios.get(url); + } catch (error) { + throw Error(`Failed to get data: ${error}`); + } + + const data = Papa.parse(r.data, { header: true }).data; + const output = []; + for (const entry of data) { + // Remove quotes from beginning and end of certificate + const certPem = entry["PEM Info"].slice(1, -1); + const commonName = entry["Common Name or Certificate Name"]; + output.push(`${commonName}\n${certPem}`); + } + return output; +} + +/** + * @returns {Promise} + */ +async function retrieveCABundle() { + // Download at the same time + const values = await Promise.all([ + downloadCertificates(INTERMEDIATE_CA_URL), + downloadCertificates(ROOT_CA_URL), + ]); + + const intermediateCACerts = values[0]; + const rootCACerts = values[1]; + + const combinedCACerts = intermediateCACerts.concat(rootCACerts); + return combinedCACerts.join("\n\n"); +} + +/** + * @param {string} filePath + */ +export async function retrieveAndStoreCABundle(filePath) { + const caBundle = await retrieveCABundle(); + + try { + await writeFile(filePath, caBundle); + console.log(`Downloaded Mozilla CA bundle and saved it to ${filePath}`); + } catch (error) { + console.error("Error writing file:", error); + return; + } +} + +/** + * + * @param {string} filePath + */ +export async function setupCABundle(filePath) { + process.env.NODE_EXTRA_CA_CERTS = filePath; +} diff --git a/src/cache.js b/src/cache.js new file mode 100644 index 0000000..04b1a31 --- /dev/null +++ b/src/cache.js @@ -0,0 +1,67 @@ +import path from "node:path"; +import os from "node:os"; +import fs from "node:fs"; + +import { retrieveAndStoreCABundle } from "./ca-bundle.js"; +import { retrieveAndStoreHsts } from "./hsts.js"; +import { retrieveAndStoreTldList } from "./tld-list.js"; + +const CACHE_DIR = path.join(os.homedir(), ".cache", "mdn-http-observatory"); +export const CA_BUNDLE_PATH = path.join(CACHE_DIR, "mozilla.ca-bundle"); +export const HSTS_PRELOAD_PATH = path.join(CACHE_DIR, "hsts-preload.json"); +export const TLD_LIST_PATH = path.join(CACHE_DIR, "tld-list.json"); + +/** + * Setup the cache. + * + * Create `~/.config/mdn-http-observatory` if it doesn't exist. + * Only download files if they don't exist in the cache directory. + */ +export async function setupCache() { + setupCacheDirectory(); + + const promises = []; + if (!fs.existsSync(CA_BUNDLE_PATH)) { + promises.push(retrieveAndStoreCABundle(CA_BUNDLE_PATH)); + } + if (!fs.existsSync(HSTS_PRELOAD_PATH)) { + promises.push(retrieveAndStoreHsts(HSTS_PRELOAD_PATH)); + } + if (!fs.existsSync(TLD_LIST_PATH)) { + promises.push(retrieveAndStoreTldList(TLD_LIST_PATH)); + } + + // Download at the same time + await Promise.all(promises); +} + +/** + * Forcibly refresh cache. + * + * Downloading all files even if they are already present in the cache + * directory. + */ +export async function refreshCache() { + setupCacheDirectory(); + + await Promise.all([ + retrieveAndStoreCABundle(CA_BUNDLE_PATH), + retrieveAndStoreHsts(HSTS_PRELOAD_PATH), + retrieveAndStoreTldList(TLD_LIST_PATH), + ]); +} + +function setupCacheDirectory() { + try { + if (!fs.existsSync(CACHE_DIR)) { + fs.mkdirSync(CACHE_DIR); + } + } catch (err) { + console.error(err); + } +} + +// Refresh cache when this file is run directly. +if (import.meta.url === `file://${process.argv[1]}`) { + refreshCache().catch(console.error); +} diff --git a/src/retrieve-hsts.js b/src/hsts.js similarity index 80% rename from src/retrieve-hsts.js rename to src/hsts.js index d181893..5cdb3d8 100644 --- a/src/retrieve-hsts.js +++ b/src/hsts.js @@ -1,7 +1,5 @@ import axios from "axios"; import { writeFile } from "fs/promises"; -import path from "node:path"; -import { fileURLToPath } from "node:url"; const HSTS_URL = new URL( "https://raw.githubusercontent.com/chromium/chromium/main/net/http/transport_security_state_static.json" @@ -16,8 +14,6 @@ const SCANNER_PINNED_DOMAINS = [ "services.mozilla.com", ]; -const dirname = path.dirname(fileURLToPath(import.meta.url)); - /** * * @typedef {Object} RawData @@ -38,9 +34,10 @@ const dirname = path.dirname(fileURLToPath(import.meta.url)); /** * Download the Google HSTS preload list + * @param {string} filePath * @returns */ -export async function retrieveAndStoreHsts() { +export async function retrieveAndStoreHsts(filePath) { let r; try { r = await axios.get(HSTS_URL.href); @@ -63,12 +60,11 @@ export async function retrieveAndStoreHsts() { pinned: SCANNER_PINNED_DOMAINS.includes(domain), }; return acc; - }, /** @type {HstsMap} */ ({})); + }, /** @type {HstsMap} */({})); - const filePath = path.join(dirname, "..", "conf", "hsts-preload.json"); try { await writeFile(filePath, JSON.stringify(hstsMap, null, 2)); - console.log(`File written to ${filePath}`); + console.log(`Downloaded HSTS data and saved it to ${filePath}`); } catch (error) { console.error("Error writing file:", error); return; @@ -83,8 +79,3 @@ export async function retrieveAndStoreHsts() { function removeJsonComments(jsonString) { return jsonString.replace(/\/\/.*$/gm, ""); } - -// Execute when run directly -if (import.meta.url === `file://${process.argv[1]}`) { - retrieveAndStoreHsts().catch(console.error); -} diff --git a/src/maintenance/index.js b/src/maintenance/index.js index 661ac7b..4522d05 100644 --- a/src/maintenance/index.js +++ b/src/maintenance/index.js @@ -2,16 +2,13 @@ import { createPool, refreshMaterializedViews, } from "../database/repository.js"; +import { refreshCache } from "../cache.js"; console.log("Starting MV refresh."); const pool = createPool(); await refreshMaterializedViews(pool); console.log("Successfully refreshed materialized views."); -import { retrieveAndStoreTldList } from "../retrieve-tld-list.js"; -await retrieveAndStoreTldList(); -console.log("Successfully updated TLD list."); - -import { retrieveAndStoreHsts } from "../retrieve-hsts.js"; -await retrieveAndStoreHsts(); -console.log("Successfully updated HSTS data."); +console.log("Starting cache refresh."); +await refreshCache(); +console.log("Successfully refreshed cache."); diff --git a/src/scan.js b/src/scan.js index e4b307d..7408850 100755 --- a/src/scan.js +++ b/src/scan.js @@ -3,6 +3,7 @@ import { Command } from "commander"; import { scan } from "./scanner/index.js"; import { Site } from "./site.js"; +import { setupCache } from "./cache.js"; const NAME = "mdn-http-observatory-scan"; const program = new Command(); @@ -14,6 +15,8 @@ program .argument("", "hostname to scan") .action(async (siteString, _options) => { try { + await setupCache(); + const site = Site.fromSiteString(siteString); const result = await scan(site); const tests = Object.fromEntries( diff --git a/src/retrieve-tld-list.js b/src/tld-list.js similarity index 65% rename from src/retrieve-tld-list.js rename to src/tld-list.js index ae76f96..2db56eb 100644 --- a/src/retrieve-tld-list.js +++ b/src/tld-list.js @@ -1,18 +1,15 @@ import axios from "axios"; import { writeFile } from "fs/promises"; -import path from "node:path"; -import { fileURLToPath } from "node:url"; const TLD_LIST_URL = new URL( "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" ); -const dirname = path.dirname(fileURLToPath(import.meta.url)); - /** * Download the IANA-maintained public suffix list + * @param {string} filePath */ -export async function retrieveAndStoreTldList() { +export async function retrieveAndStoreTldList(filePath) { let r; try { r = await axios.get(TLD_LIST_URL.href); @@ -21,10 +18,9 @@ export async function retrieveAndStoreTldList() { return; } const data = cleanData(r.data); - const filePath = path.join(dirname, "..", "conf", "tld-list.json"); try { await writeFile(filePath, data); - console.log(`File written to ${filePath}`); + console.log(`Downloaded TLD list and saved it to ${filePath}`); } catch (error) { console.error("Error writing file:", error); return; @@ -45,8 +41,3 @@ function cleanData(data) { .map((line) => line.trim().toLowerCase()); return JSON.stringify(ret); } - -// Execute when run directly -if (import.meta.url === `file://${process.argv[1]}`) { - retrieveAndStoreTldList().catch(console.error); -}