@@ -123,6 +123,13 @@ export default class Interpreter extends EventEmitter {
123123 this . isAborted = true ;
124124 }
125125
126+ /**
127+ * Returns the current abort status
128+ */
129+ public getIsAborted ( ) : boolean {
130+ return this . isAborted ;
131+ }
132+
126133 private async applyAdBlocker ( page : Page ) : Promise < void > {
127134 if ( this . blocker ) {
128135 try {
@@ -610,6 +617,13 @@ export default class Interpreter extends EventEmitter {
610617
611618 if ( methodName === 'waitForLoadState' ) {
612619 try {
620+ let args = step . args ;
621+
622+ if ( Array . isArray ( args ) && args . length === 1 ) {
623+ args = [ args [ 0 ] , { timeout : 30000 } ] ;
624+ } else if ( ! Array . isArray ( args ) ) {
625+ args = [ args , { timeout : 30000 } ] ;
626+ }
613627 await executeAction ( invokee , methodName , step . args ) ;
614628 } catch ( error ) {
615629 await executeAction ( invokee , methodName , 'domcontentloaded' ) ;
@@ -670,7 +684,19 @@ export default class Interpreter extends EventEmitter {
670684 return ;
671685 }
672686
673- const results = await page . evaluate ( ( cfg ) => window . scrapeList ( cfg ) , config ) ;
687+ const evaluationPromise = page . evaluate ( ( cfg ) => window . scrapeList ( cfg ) , config ) ;
688+ const timeoutPromise = new Promise < any [ ] > ( ( _ , reject ) =>
689+ setTimeout ( ( ) => reject ( new Error ( 'Page evaluation timeout' ) ) , 10000 )
690+ ) ;
691+
692+ let results ;
693+ try {
694+ results = await Promise . race ( [ evaluationPromise , timeoutPromise ] ) ;
695+ } catch ( error ) {
696+ debugLog ( `Page evaluation failed: ${ error . message } ` ) ;
697+ return ;
698+ }
699+
674700 const newResults = results . filter ( item => {
675701 const uniqueKey = JSON . stringify ( item ) ;
676702 if ( scrapedItems . has ( uniqueKey ) ) return false ;
@@ -691,43 +717,94 @@ export default class Interpreter extends EventEmitter {
691717 return false ;
692718 } ;
693719
720+ // Helper function to detect if a selector is XPath
721+ const isXPathSelector = ( selector : string ) : boolean => {
722+ return selector . startsWith ( '//' ) ||
723+ selector . startsWith ( '/' ) ||
724+ selector . startsWith ( './' ) ||
725+ selector . includes ( 'contains(@' ) ||
726+ selector . includes ( '[count(' ) ||
727+ selector . includes ( '@class=' ) ||
728+ selector . includes ( '@id=' ) ||
729+ selector . includes ( ' and ' ) ||
730+ selector . includes ( ' or ' ) ;
731+ } ;
732+
733+ // Helper function to wait for selector (CSS or XPath)
734+ const waitForSelectorUniversal = async ( selector : string , options : any = { } ) : Promise < ElementHandle | null > => {
735+ try {
736+ if ( isXPathSelector ( selector ) ) {
737+ // Use XPath locator
738+ const locator = page . locator ( `xpath=${ selector } ` ) ;
739+ await locator . waitFor ( {
740+ state : 'attached' ,
741+ timeout : options . timeout || 10000
742+ } ) ;
743+ return await locator . elementHandle ( ) ;
744+ } else {
745+ // Use CSS selector
746+ return await page . waitForSelector ( selector , {
747+ state : 'attached' ,
748+ timeout : options . timeout || 10000
749+ } ) ;
750+ }
751+ } catch ( error ) {
752+ return null ;
753+ }
754+ } ;
755+
694756 // Enhanced button finder with retry mechanism
695- const findWorkingButton = async ( selectors : string [ ] ) : Promise < {
696- button : ElementHandle | null ,
757+ const findWorkingButton = async ( selectors : string [ ] ) : Promise < {
758+ button : ElementHandle | null ,
697759 workingSelector : string | null ,
698760 updatedSelectors : string [ ]
699761 } > => {
700- let updatedSelectors = [ ...selectors ] ;
701-
762+ const startTime = Date . now ( ) ;
763+ const MAX_BUTTON_SEARCH_TIME = 15000 ;
764+ let updatedSelectors = [ ...selectors ] ;
765+
702766 for ( let i = 0 ; i < selectors . length ; i ++ ) {
767+ if ( Date . now ( ) - startTime > MAX_BUTTON_SEARCH_TIME ) {
768+ debugLog ( `Button search timeout reached (${ MAX_BUTTON_SEARCH_TIME } ms), aborting` ) ;
769+ break ;
770+ }
703771 const selector = selectors [ i ] ;
704772 let retryCount = 0 ;
705773 let selectorSuccess = false ;
706774
707775 while ( retryCount < MAX_RETRIES && ! selectorSuccess ) {
708776 try {
709- const button = await page . waitForSelector ( selector , {
710- state : 'attached' ,
711- timeout : 10000
712- } ) ;
713-
777+ const button = await waitForSelectorUniversal ( selector , { timeout : 2000 } ) ;
778+
714779 if ( button ) {
715780 debugLog ( 'Found working selector:' , selector ) ;
716- return {
717- button,
781+ return {
782+ button,
718783 workingSelector : selector ,
719- updatedSelectors
784+ updatedSelectors
720785 } ;
786+ } else {
787+ retryCount ++ ;
788+ debugLog ( `Selector "${ selector } " not found: attempt ${ retryCount } /${ MAX_RETRIES } ` ) ;
789+
790+ if ( retryCount < MAX_RETRIES ) {
791+ await page . waitForTimeout ( RETRY_DELAY ) ;
792+ } else {
793+ debugLog ( `Removing failed selector "${ selector } " after ${ MAX_RETRIES } attempts` ) ;
794+ updatedSelectors = updatedSelectors . filter ( s => s !== selector ) ;
795+ selectorSuccess = true ;
796+ }
721797 }
722798 } catch ( error ) {
723799 retryCount ++ ;
724- debugLog ( `Selector "${ selector } " failed : attempt ${ retryCount } /${ MAX_RETRIES } ` ) ;
725-
800+ debugLog ( `Selector "${ selector } " error : attempt ${ retryCount } /${ MAX_RETRIES } - ${ error . message } ` ) ;
801+
726802 if ( retryCount < MAX_RETRIES ) {
727803 await page . waitForTimeout ( RETRY_DELAY ) ;
728804 } else {
729805 debugLog ( `Removing failed selector "${ selector } " after ${ MAX_RETRIES } attempts` ) ;
730806 updatedSelectors = updatedSelectors . filter ( s => s !== selector ) ;
807+ selectorSuccess = true ;
731808 }
732809 }
733810 }
@@ -1347,9 +1424,35 @@ export default class Interpreter extends EventEmitter {
13471424 }
13481425
13491426 private async ensureScriptsLoaded ( page : Page ) {
1350- const isScriptLoaded = await page . evaluate ( ( ) => typeof window . scrape === 'function' && typeof window . scrapeSchema === 'function' && typeof window . scrapeList === 'function' && typeof window . scrapeListAuto === 'function' && typeof window . scrollDown === 'function' && typeof window . scrollUp === 'function' ) ;
1351- if ( ! isScriptLoaded ) {
1352- await page . addInitScript ( { path : path . join ( __dirname , 'browserSide' , 'scraper.js' ) } ) ;
1427+ try {
1428+ const evaluationPromise = page . evaluate ( ( ) =>
1429+ typeof window . scrape === 'function' &&
1430+ typeof window . scrapeSchema === 'function' &&
1431+ typeof window . scrapeList === 'function' &&
1432+ typeof window . scrapeListAuto === 'function' &&
1433+ typeof window . scrollDown === 'function' &&
1434+ typeof window . scrollUp === 'function'
1435+ ) ;
1436+
1437+ const timeoutPromise = new Promise < boolean > ( ( _ , reject ) =>
1438+ setTimeout ( ( ) => reject ( new Error ( 'Script check timeout' ) ) , 3000 )
1439+ ) ;
1440+
1441+ const isScriptLoaded = await Promise . race ( [
1442+ evaluationPromise ,
1443+ timeoutPromise
1444+ ] ) ;
1445+
1446+ if ( ! isScriptLoaded ) {
1447+ await page . addInitScript ( { path : path . join ( __dirname , 'browserSide' , 'scraper.js' ) } ) ;
1448+ }
1449+ } catch ( error ) {
1450+ this . log ( `Script check failed, adding script anyway: ${ error . message } ` , Level . WARN ) ;
1451+ try {
1452+ await page . addInitScript ( { path : path . join ( __dirname , 'browserSide' , 'scraper.js' ) } ) ;
1453+ } catch ( scriptError ) {
1454+ this . log ( `Failed to add script: ${ scriptError . message } ` , Level . ERROR ) ;
1455+ }
13531456 }
13541457 }
13551458
0 commit comments