@@ -323,12 +323,23 @@ export default class PageContext extends ContentFeature {
323323 // Used to avoid large content serialization
324324 const upperLimit = this . getFeatureSetting ( 'upperLimit' ) || 500000 ;
325325 let excludeSelectors = this . getFeatureSetting ( 'excludeSelectors' ) || [ '.ad' , '.sidebar' , '.footer' , '.nav' , '.header' ] ;
326- excludeSelectors = excludeSelectors . concat ( [ 'script' , 'style' , 'link' , 'meta' , 'noscript' , 'svg' , 'canvas' ] ) ;
326+ const excludedInertElements = this . getFeatureSetting ( 'excludedInertElements' ) || [
327+ 'script' ,
328+ 'style' ,
329+ 'link' ,
330+ 'meta' ,
331+ 'noscript' ,
332+ 'svg' ,
333+ 'canvas' ,
334+ ] ;
335+ excludeSelectors = excludeSelectors . concat ( excludedInertElements ) ;
327336
328337 let content = '' ;
329338 // Get content from main content areas
330- let mainContent = document . querySelector ( 'main, article, .content, .main, #content, #main' ) ;
331- if ( mainContent && mainContent . innerHTML . trim ( ) . length <= 100 ) {
339+ const mainContentSelector = this . getFeatureSetting ( 'mainContentSelector' ) || 'main, article, .content, .main, #content, #main' ;
340+ let mainContent = document . querySelector ( mainContentSelector ) ;
341+ const mainContentLength = this . getFeatureSetting ( 'mainContentLength' ) || 100 ;
342+ if ( mainContent && mainContent . innerHTML . trim ( ) . length <= mainContentLength ) {
332343 mainContent = null ;
333344 }
334345 const contentRoot = mainContent || document . body ;
@@ -364,7 +375,8 @@ export default class PageContext extends ContentFeature {
364375
365376 getHeadings ( ) {
366377 const headings = [ ] ;
367- const headingElements = document . querySelectorAll ( 'h1, h2, h3, h4, h5, h6' ) ;
378+ const headdingSelector = this . getFeatureSetting ( 'headingSelector' ) || 'h1, h2, h3, h4, h5, h6' ;
379+ const headingElements = document . querySelectorAll ( headdingSelector ) ;
368380
369381 headingElements . forEach ( ( heading ) => {
370382 const level = parseInt ( heading . tagName . charAt ( 1 ) ) ;
@@ -379,7 +391,8 @@ export default class PageContext extends ContentFeature {
379391
380392 getLinks ( ) {
381393 const links = [ ] ;
382- const linkElements = document . querySelectorAll ( 'a[href]' ) ;
394+ const linkSelector = this . getFeatureSetting ( 'linkSelector' ) || 'a[href]' ;
395+ const linkElements = document . querySelectorAll ( linkSelector ) ;
383396
384397 linkElements . forEach ( ( link ) => {
385398 const text = link . textContent ?. trim ( ) ;
@@ -394,7 +407,8 @@ export default class PageContext extends ContentFeature {
394407
395408 getImages ( ) {
396409 const images = [ ] ;
397- const imgElements = document . querySelectorAll ( 'img' ) ;
410+ const imgSelector = this . getFeatureSetting ( 'imgSelector' ) || 'img' ;
411+ const imgElements = document . querySelectorAll ( imgSelector ) ;
398412
399413 imgElements . forEach ( ( img ) => {
400414 const alt = img . getAttribute ( 'alt' ) || '' ;
0 commit comments