@@ -3,15 +3,48 @@ import { getFaviconList } from './favicon.js';
33import { isDuckAi , isBeingFramed , getTabUrl } from '../utils.js' ;
44const MSG_PAGE_CONTEXT_RESPONSE = 'collectionResult' ;
55
6+ function checkNodeIsVisible ( node ) {
7+ try {
8+ const style = window . getComputedStyle ( node ) ;
9+
10+ // Check primary visibility properties
11+ if ( style . display === 'none' || style . visibility === 'hidden' || parseFloat ( style . opacity ) === 0 ) {
12+ return false ;
13+ }
14+ return true ;
15+ } catch ( e ) {
16+ return false ;
17+ }
18+ }
19+
620function collapseWhitespace ( str ) {
721 return typeof str === 'string' ? str . replace ( / \s + / g, ' ' ) : '' ;
822}
923
10- function domToMarkdown ( node , maxLength = Infinity ) {
24+ /**
25+ * Check if a node is an HTML element
26+ * @param {Node } node
27+ * @returns {node is HTMLElement }
28+ **/
29+ function isHtmlElement ( node ) {
30+ return node . nodeType === Node . ELEMENT_NODE ;
31+ }
32+
33+ /**
34+ * Convert a DOM node to markdown
35+ * @param {Node } node
36+ * @param {number } maxLength
37+ * @param {string } excludeSelectors
38+ * @returns {string }
39+ */
40+ function domToMarkdown ( node , maxLength = Infinity , excludeSelectors ) {
1141 if ( node . nodeType === Node . TEXT_NODE ) {
1242 return collapseWhitespace ( node . textContent ) ;
1343 }
14- if ( node . nodeType !== Node . ELEMENT_NODE ) {
44+ if ( ! isHtmlElement ( node ) ) {
45+ return '' ;
46+ }
47+ if ( ! checkNodeIsVisible ( node ) || node . matches ( excludeSelectors ) ) {
1548 return '' ;
1649 }
1750
@@ -20,7 +53,7 @@ function domToMarkdown(node, maxLength = Infinity) {
2053 // Build children string incrementally to exit early when maxLength is exceeded
2154 let children = '' ;
2255 for ( const childNode of node . childNodes ) {
23- const childContent = domToMarkdown ( childNode , maxLength - children . length ) ;
56+ const childContent = domToMarkdown ( childNode , maxLength - children . length , excludeSelectors ) ;
2457 children += childContent ;
2558
2659 if ( children . length > maxLength ) {
@@ -333,6 +366,7 @@ export default class PageContext extends ContentFeature {
333366 'canvas' ,
334367 ] ;
335368 excludeSelectors = excludeSelectors . concat ( excludedInertElements ) ;
369+ const excludeSelectorsString = excludeSelectors . join ( ',' ) ;
336370
337371 let content = '' ;
338372 // Get content from main content areas
@@ -346,18 +380,8 @@ export default class PageContext extends ContentFeature {
346380
347381 if ( contentRoot ) {
348382 this . log . info ( 'Getting main content' , contentRoot ) ;
349- // Create a clone to work with
350- const clone = /** @type {Element } */ ( contentRoot . cloneNode ( true ) ) ;
351-
352- // Remove excluded elements
353- excludeSelectors . forEach ( ( selector ) => {
354- const elements = clone . querySelectorAll ( selector ) ;
355- elements . forEach ( ( el ) => el . remove ( ) ) ;
356- } ) ;
357-
358- this . log . info ( 'Calling domToMarkdown' , clone . innerHTML ) ;
359- content += domToMarkdown ( clone , upperLimit ) ;
360- this . log . info ( 'Content markdown' , content , clone , contentRoot ) ;
383+ content += domToMarkdown ( contentRoot , upperLimit , excludeSelectorsString ) ;
384+ this . log . info ( 'Content markdown' , content , contentRoot ) ;
361385 }
362386 content = content . trim ( ) ;
363387
@@ -375,8 +399,8 @@ export default class PageContext extends ContentFeature {
375399
376400 getHeadings ( ) {
377401 const headings = [ ] ;
378- const headdingSelector = this . getFeatureSetting ( 'headingSelector' ) || 'h1, h2, h3, h4, h5, h6' ;
379- const headingElements = document . querySelectorAll ( headdingSelector ) ;
402+ const headingSelector = this . getFeatureSetting ( 'headingSelector' ) || 'h1, h2, h3, h4, h5, h6' ;
403+ const headingElements = document . querySelectorAll ( headingSelector ) ;
380404
381405 headingElements . forEach ( ( heading ) => {
382406 const level = parseInt ( heading . tagName . charAt ( 1 ) ) ;
0 commit comments