@@ -14,7 +14,7 @@ import TabItem from '@theme/TabItem';
1414
1515---
1616
17- Now that we know how to execute scripts on a page, we're ready to learn a bit about [ data extraction] ( ../../scraping_basics_javascript/data_extraction/index.md ) . In this lesson, we'll be scraping all the on-sale products from our [ Fakestore ] ( https://demo-webstore.apify.org/search/on-sale ) website.
17+ Now that we know how to execute scripts on a page, we're ready to learn a bit about [ data extraction] ( ../../scraping_basics_javascript/data_extraction/index.md ) . In this lesson, we'll be scraping all the on-sale products from [ warehouse-theme-metal.myshopify.com ] ( https://warehouse-theme-metal.myshopify.com/ ) , a sample Shopify website.
1818
1919> Most web data extraction cases involve looping through a list of items of some sort.
2020
@@ -36,7 +36,7 @@ import { chromium } from 'playwright';
3636const browser = await chromium .launch ({ headless: false });
3737const page = await browser .newPage ();
3838
39- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
39+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
4040
4141// code will go here
4242
@@ -54,7 +54,7 @@ import puppeteer from 'puppeteer';
5454const browser = await puppeteer .launch ({ headless: false });
5555const page = await browser .newPage ();
5656
57- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
57+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
5858
5959// code will go here
6060
@@ -82,16 +82,12 @@ We'll be returning a bunch of product objects from this function, which will be
8282
8383``` js
8484const products = await page .evaluate (() => {
85- const productCards = Array .from (document .querySelectorAll (' a[class*="ProductCard_root"] ' ));
85+ const productCards = Array .from (document .querySelectorAll (' .product-item ' ));
8686
8787 return productCards .map ((element ) => {
88- const name = element .querySelector (' h3[class*="ProductCard_name"]' ).textContent ;
89- const price = element .querySelector (' div[class*="ProductCard_price"]' ).textContent ;
90-
91- return {
92- name,
93- price,
94- };
88+ const name = element .querySelector (' .product-item__title' ).textContent ;
89+ const price = element .querySelector (' .price' ).lastChild .textContent ;
90+ return { name, price };
9591 });
9692});
9793
@@ -100,7 +96,20 @@ console.log(products);
10096
10197When we run this code, we see this logged to our console:
10298
103- ![ Products logged to the console] ( ./images/log-products.png )
99+ ``` text
100+ $ node index.js
101+ [
102+ {
103+ name: 'JBL Flip 4 Waterproof Portable Bluetooth Speaker',
104+ price: '$74.95'
105+ },
106+ {
107+ name: 'Sony XBR-950G BRAVIA 4K HDR Ultra HD TV',
108+ price: 'From $1,398.00'
109+ },
110+ ...
111+ ]
112+ ```
104113
105114## Using jQuery {#using-jquery}
106115
@@ -118,19 +127,12 @@ Now, since we're able to use jQuery, let's translate our vanilla JavaScript code
118127await page .addScriptTag ({ url: ' https://code.jquery.com/jquery-3.6.0.min.js' });
119128
120129const products = await page .evaluate (() => {
121- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
122-
123- return productCards .map ((element ) => {
124- const card = $ (element);
125-
126- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
127- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
128-
129- return {
130- name,
131- price,
132- };
133- });
130+ return Array .from ($ (' .product-item' ).map (function () {
131+ const card = $ (this );
132+ const name = card .find (' .product-item__title' ).text ();
133+ const price = card .find (' .price' ).contents ().last ().text ();
134+ return { name, price };
135+ }));
134136});
135137
136138console .log (products);
@@ -178,7 +180,7 @@ import { load } from 'cheerio';
178180const browser = await chromium .launch ({ headless: false });
179181const page = await browser .newPage ();
180182
181- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
183+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
182184
183185const $ = load (await page .content ());
184186
@@ -197,7 +199,7 @@ import { load } from 'cheerio';
197199const browser = await puppeteer .launch ({ headless: false });
198200const page = await browser .newPage ();
199201
200- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
202+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
201203
202204const $ = load (await page .content ());
203205
@@ -214,19 +216,12 @@ Now, to loop through all of the products, we'll make use of the `$` object and l
214216``` js
215217const $ = load (await page .content ());
216218
217- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
218-
219- const products = productCards .map ((element ) => {
220- const card = $ (element);
221-
222- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
223- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
224-
225- return {
226- name,
227- price,
228- };
229- });
219+ const products = Array .from ($ (' .product-item' ).map (function () {
220+ const card = $ (this );
221+ const name = card .find (' .product-item__title' ).text ();
222+ const price = card .find (' .price' ).contents ().last ().text ();
223+ return { name, price };
224+ }));
230225
231226console .log (products);
232227```
@@ -245,23 +240,16 @@ import { load } from 'cheerio';
245240const browser = await chromium .launch ({ headless: false });
246241const page = await browser .newPage ();
247242
248- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
243+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
249244
250245const $ = load (await page .content ());
251246
252- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
253-
254- const products = productCards .map ((element ) => {
255- const card = $ (element);
256-
257- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
258- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
259-
260- return {
261- name,
262- price,
263- };
264- });
247+ const products = Array .from ($ (' .product-item' ).map (function () {
248+ const card = $ (this );
249+ const name = card .find (' .product-item__title' ).text ();
250+ const price = card .find (' .price' ).contents ().last ().text ();
251+ return { name, price };
252+ }));
265253
266254console .log (products);
267255
@@ -278,23 +266,16 @@ import { load } from 'cheerio';
278266const browser = await puppeteer .launch ({ headless: false });
279267const page = await browser .newPage ();
280268
281- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
269+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
282270
283271const $ = load (await page .content ());
284272
285- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
286-
287- const products = productCards .map ((element ) => {
288- const card = $ (element);
289-
290- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
291- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
292-
293- return {
294- name,
295- price,
296- };
297- });
273+ const products = Array .from ($ (' .product-item' ).map (function () {
274+ const card = $ (this );
275+ const name = card .find (' .product-item__title' ).text ();
276+ const price = card .find (' .price' ).contents ().last ().text ();
277+ return { name, price };
278+ }));
298279
299280console .log (products);
300281
0 commit comments