@@ -149,41 +149,75 @@ async function checkLinks() {
149149 }
150150
151151 const getPageData = async ( ) => {
152- try {
153- const response = await nFetch ( externalPageLink , {
154- headers : {
155- // Spoof a normal looking User-Agent to keep the servers happy
156- // See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
157- //
158- // To better future-proof against the ua string being
159- // responsible for any breakage, pull common, up-to-date strings
160- // from a reliable source.
161- // https://github.com/w3c/aria-practices/issues/3270
162- 'User-Agent' :
163- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1' ,
164- } ,
165- } ) ;
166- const text = await response . text ( ) ;
167- const html = HTMLParser . parse ( text ) ;
168- const ids = html
169- . querySelectorAll ( '[id]' )
170- . map ( ( idElement ) => idElement . getAttribute ( 'id' ) ) ;
171-
172- // Handle GitHub README links.
173- // These links are stored within a react-partial element
174- const reactPartial = getReactPartial ( hrefOrSrc , html ) ;
175- return {
176- ok : response . ok ,
177- status : response . status ,
178- ids,
179- reactPartial,
180- } ;
181- } catch ( error ) {
182- return {
183- errorMessage :
184- `Found broken external link on ${ htmlPath } :${ lineNumber } :${ columnNumber } \n` +
185- ` ${ error . stack } ` ,
186- } ;
152+ const domain = new URL ( externalPageLink ) . hostname ;
153+ let retryCount = 0 ;
154+ const maxRetries = 3 ;
155+ const baseDelay = 15 ;
156+
157+ while ( retryCount < maxRetries ) {
158+ try {
159+ const response = await nFetch ( externalPageLink , {
160+ headers : {
161+ // Spoof a normal looking User-Agent to keep the servers happy
162+ // See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
163+ //
164+ // To better future-proof against the ua string being
165+ // responsible for any breakage, pull common, up-to-date strings
166+ // from a reliable source.
167+ // https://github.com/w3c/aria-practices/issues/3270
168+ 'User-Agent' :
169+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1' ,
170+ } ,
171+ } ) ;
172+
173+ // Handle rate limit-related statuses
174+ if (
175+ response . status === 403 ||
176+ response . status === 429 ||
177+ response . status === 503 ||
178+ response . status === 508
179+ ) {
180+ throw new Error (
181+ response . status === 429
182+ ? `Rate limited by ${ domain } `
183+ : `Unsuccessful response from ${ domain } (${ response . status } )`
184+ ) ;
185+ }
186+
187+ const text = await response . text ( ) ;
188+ const html = HTMLParser . parse ( text ) ;
189+ const ids = html
190+ . querySelectorAll ( '[id]' )
191+ . map ( ( idElement ) => idElement . getAttribute ( 'id' ) ) ;
192+
193+ // Handle GitHub README links.
194+ // These links are stored within a react-partial element
195+ const reactPartial = getReactPartial ( hrefOrSrc , html ) ;
196+ return {
197+ ok : response . ok ,
198+ status : response . status ,
199+ ids,
200+ reactPartial,
201+ } ;
202+ } catch ( error ) {
203+ if ( retryCount < maxRetries ) {
204+ // Found the retry-after unit returned from response headers too
205+ // variable to use here, but ~15 seconds seems like a safe
206+ // initial default
207+ const delay = baseDelay * 1000 * Math . pow ( 2 , retryCount ) ;
208+ console . info (
209+ `Error fetching ${ externalPageLink } : ${ error . message } , retrying in ${ delay } ms`
210+ ) ;
211+ await new Promise ( ( resolve ) => setTimeout ( resolve , delay ) ) ;
212+ retryCount ++ ;
213+ continue ;
214+ }
215+ return {
216+ errorMessage :
217+ `Found broken external link on ${ htmlPath } :${ lineNumber } :${ columnNumber } \n` +
218+ ` ${ error . stack } ` ,
219+ } ;
220+ }
187221 }
188222 } ;
189223
@@ -213,17 +247,6 @@ async function checkLinks() {
213247 Object . entries ( externalPageLoaders ) . map (
214248 async ( [ externalPageLink , getPageData ] ) => {
215249 let pageData = await getPageData ( ) ;
216- if ( pageData . errorMessage ) {
217- console . info ( 'Retrying once' ) ;
218- pageData = await getPageData ( ) ;
219- }
220- if ( pageData . errorMessage ) {
221- await new Promise ( ( resolve ) => {
222- setTimeout ( resolve , 2000 ) ;
223- } ) ;
224- console . info ( 'Retrying twice' ) ;
225- pageData = await getPageData ( ) ;
226- }
227250 externalPageData [ externalPageLink ] = pageData ;
228251 loadedCount += 1 ;
229252 }
0 commit comments