Skip to content

Commit 5038f6d

Browse files
committed
airbnb ✅
1 parent 694123b commit 5038f6d

File tree

2 files changed

+31
-10
lines changed

2 files changed

+31
-10
lines changed

airbnb/src/lib.rs

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::error::Error;
22
use std::thread;
33
use std::time::Duration;
4-
use thirtyfour::{prelude::WebDriverError, By, DesiredCapabilities, WebDriver, WebElement};
4+
use thirtyfour::{prelude::{WebDriverError, ElementWaitable}, By, DesiredCapabilities, WebDriver, WebElement};
55
use url::Url;
66
use serde::Serialize;
77

@@ -33,7 +33,9 @@ async fn scrape_all(driver: WebDriver) -> Result<(), Box<dyn Error>> {
3333

3434
match next_page_button.is_clickable().await? {
3535
true => {
36-
36+
37+
//start extracting data
38+
3739
let house_elems = get_house_elements(&driver).await?;
3840

3941
for house_elem in house_elems {
@@ -63,14 +65,11 @@ async fn scrape_all(driver: WebDriver) -> Result<(), Box<dyn Error>> {
6365
Ok(())
6466
}
6567

66-
async fn get_house_elements(driver: &WebDriver) -> Result<Vec<WebElement>, WebDriverError> {
67-
driver.find_all(By::Css("#site-content > div > div:nth-child(2) > div > div > div > div > div.gsgwcjk.g8ge8f1.g14v8520.dir.dir-ltr > div.dir.dir-ltr > div > div.c1l1h97y.dir.dir-ltr > div > div > div > div.cy5jw6o.dir.dir-ltr > div > div.g1qv1ctd.c1v0rf5q.dir.dir-ltr")).await
68-
}
69-
7068
async fn load_next_page(
7169
next_page_button: WebElement,
7270
driver: &WebDriver,
7371
) -> Result<(), Box<dyn Error>> {
72+
7473
next_page_button.click().await?;
7574
thread::sleep(Duration::from_secs(2));
7675

@@ -82,6 +81,10 @@ async fn load_next_page(
8281
Ok(())
8382
}
8483

84+
async fn get_house_elements(driver: &WebDriver) -> Result<Vec<WebElement>, WebDriverError> {
85+
driver.find_all(By::Css("#site-content > div > div:nth-child(2) > div > div > div > div > div.gsgwcjk.g8ge8f1.g14v8520.dir.dir-ltr > div.dir.dir-ltr > div > div.c1l1h97y.dir.dir-ltr > div > div > div > div.cy5jw6o.dir.dir-ltr > div > div.g1qv1ctd.c1v0rf5q.dir.dir-ltr")).await
86+
}
87+
8588
async fn initialize_driver() -> Result<WebDriver, WebDriverError> {
8689
let caps = DesiredCapabilities::chrome();
8790
let driver = WebDriver::new("http://localhost:9515", caps).await?;
@@ -94,8 +97,13 @@ async fn search_location(driver: &WebDriver, place: &str) -> Result<(), WebDrive
9497

9598
write_place(driver, place).await?;
9699

97-
driver.find(By::Css("#search-tabpanel > div.i1flv5qo.dir.dir-ltr > div.c6ezw63.c1geg2ah.dir.dir-ltr > div.c192dx2b.ckzf1ch.dir.dir-ltr > div.s31emu3.dir.dir-ltr > button")).await?.click().await?;
100+
click_search_button(driver).await?;
101+
102+
Ok(())
103+
}
98104

105+
async fn click_search_button(driver: &WebDriver) -> Result<(), WebDriverError> {
106+
driver.find(By::Css("#search-tabpanel > div.i1flv5qo.dir.dir-ltr > div.c6ezw63.c1geg2ah.dir.dir-ltr > div.c192dx2b.ckzf1ch.dir.dir-ltr > div.s31emu3.dir.dir-ltr > button")).await?.click().await?;
99107
Ok(())
100108
}
101109

@@ -111,9 +119,8 @@ async fn write_place(driver: &WebDriver, place: &str) -> Result<(), WebDriverErr
111119
let input = driver
112120
.find(By::Css("#bigsearch-query-location-input"))
113121
.await?;
114-
while !input.is_clickable().await? {
115-
thread::sleep(Duration::from_millis(100));
116-
}
122+
input.wait_until().clickable().await?;
123+
117124
input.send_keys(place).await?;
118125

119126
Ok(())

readme.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Web Scraping using rust
2+
This repository will contain the source code of my serious about web scraping using rust.
3+
4+
[Scraping airbnb](https://itehax.com/blog/web-scraping-using-rust)
5+
6+
Currently the repository contains
7+
:
8+
- [x] Airbnb scraping.
9+
- [ ] Anna's archive (bypass cloudflare and undetected chromedriver).
10+
- [ ] downloading video from mp4 file.
11+
- [ ] download video from streaming website (parse m3u8 playlist and extract js scripts).
12+
13+
14+

0 commit comments

Comments
 (0)