11require ( "./extensions.js" ) ;
22const typeOf = require ( "typeof" ) ;
3+ const objectMerge = require ( 'object-merge' ) ;
34
45const Runner = require ( './Runner.js' ) ;
56
@@ -8,24 +9,52 @@ const config = require('./config.js');
89
910let runners = { } ;
1011
11- const puppeteerRunner = new Runner ( 'puppeteer' , async ( url , scrapper ) => {
12+ const puppeteerRunner = new Runner ( 'puppeteer' , async ( { url, scrapper, options = { } } = { } ) => {
1213 const puppeteer = require ( 'puppeteer' ) ;
1314
14- const browser = await puppeteer . launch ( config . puppeteer ) ;
15+ let _options = {
16+ config : {
17+ headless : false
18+ } ,
19+ requestInterception : {
20+ active : true ,
21+ block : ( { request} ) => request . resourceType === 'font'
22+ }
23+ } ;
24+ _options = objectMerge ( _options , options . puppeteer ) ;
25+ _options . config = objectMerge ( _options . config , config . puppeteer ) ;
26+
27+ const browser = await puppeteer . launch ( _options . config ) ;
1528 const page = await browser . newPage ( ) ;
1629
1730 await page . emulateMedia ( 'screen' ) ;
1831
19- await page . setRequestInterception ( true ) ;
32+ await page . setRequestInterception ( _options . requestInterception . active ) ;
2033 page . on ( 'request' , request => {
21- if ( //request.resourceType === 'image' ||
22- //request.resourceType === 'stylesheet' ||
23- request . resourceType === 'font' )
34+ const block = _options . requestInterception . block && _options . requestInterception . block ( {
35+ request : request ,
36+ resourceType : request . resourceType ( ) ,
37+ url : request . url ( ) ,
38+ page : page ,
39+ browser : browser ,
40+ puppeteer : puppeteer
41+ } ) ;
42+
43+ if ( typeOf ( block ) === 'undefined' )
44+ return ;
45+ if ( block )
2446 request . abort ( ) ;
2547 else
2648 request . continue ( ) ;
2749 } ) ;
2850
51+ if ( _options . init )
52+ _options . init ( {
53+ page : page ,
54+ browser : browser ,
55+ puppeteer : puppeteer
56+ } ) ;
57+
2958 /*await page.evaluateOnNewDocument(async () => {
3059 HTMLVideoElement.prototype.canPlayType = function () { return "probably"; };
3160 });*/
@@ -44,37 +73,45 @@ const puppeteerRunner = new Runner('puppeteer', async (url, scrapper) => {
4473
4574 return ret ;
4675} ) ;
47- const htmlRunner = new Runner ( 'html' , async ( url , scrapper ) => {
76+ const htmlRunner = new Runner ( 'html' , async ( { url, scrapper, options = { } } = { } ) => {
4877 const axios = require ( 'axios' ) ;
4978
50- return await axios . get ( url , {
51- "accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" ,
52- "user-agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
53- } ) . then ( async response => await
54- scrapper ( {
79+ let config = {
80+ url : url ,
81+ method : 'get' ,
82+ headers : {
83+ "Accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" ,
84+ "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
85+ }
86+ } ;
87+ if ( options . axios && options . axios . config )
88+ config = objectMerge ( config , options . axios . config ) ;
89+
90+ const response = await axios . request ( config ) ;
91+ return await scrapper ( {
5592 url : url ,
5693 html : response . data ,
94+ response : response ,
5795 scrapper : scrapper ,
5896 runners : runners
59- } ) ) ;
97+ } ) ;
6098} ) ;
61- const domRunner = new Runner ( 'dom' , async ( url , scrapper ) => {
62- const { default : HTMLStringParser } = require ( "htmlstringparser" ) ;
63-
64- return await htmlRunner . run ( url , async ( { html} ) => {
65- const dom = new HTMLStringParser ( html ) ;
66-
67- return await scrapper ( {
68- url : url ,
69- html : html ,
70- parser : HTMLStringParser ,
71- dom : dom ,
72- scrapper : scrapper ,
73- runners : runners
74- } ) ;
99+ const domRunner = new Runner ( 'dom' , async ( { url, scrapper, options= { } } = { } ) => {
100+ const { 'default' : HTMLStringParser } = require ( "htmlstringparser" ) ;
101+
102+ return await htmlRunner . run ( {
103+ url : url ,
104+ scrapper : async ( args ) => {
105+ const dom = new HTMLStringParser ( args . html ) ;
106+
107+ args . parser = HTMLStringParser ;
108+ args . dom = dom ;
109+ return await scrapper ( args ) ;
110+ } ,
111+ options : options
75112 } ) ;
76113} ) ;
77- const urlRunner = new Runner ( 'url' , async ( url , scrapper ) => {
114+ const urlRunner = new Runner ( 'url' , async ( { url, scrapper, options = { } } = { } ) => {
78115 return await scrapper ( {
79116 url : url ,
80117 scrapper : scrapper ,
0 commit comments