Skip to content

Commit 694123b

Browse files
committed
first commit
0 parents  commit 694123b

File tree

6 files changed

+237
-0
lines changed

6 files changed

+237
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
cargo.lock
2+
target
3+
chromedriver.exe

airbnb/Cargo.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[package]
2+
name = "airbnb"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
csv = "1.2.2"
10+
serde = { version = "1.0.188", features = ["derive"] }
11+
thirtyfour = "0.31.0"
12+
tokio = { version = "1.32.0", features = ["full"] }
13+
url = "2.4.1"

airbnb/src/lib.rs

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
use std::error::Error;
2+
use std::thread;
3+
use std::time::Duration;
4+
use thirtyfour::{prelude::WebDriverError, By, DesiredCapabilities, WebDriver, WebElement};
5+
use url::Url;
6+
use serde::Serialize;
7+
8+
pub async fn scrape_airbnb(place: &str) -> Result<(), Box<dyn Error>> {
9+
let driver = initialize_driver().await?;
10+
let url = Url::parse("https://www.airbnb.it/")?;
11+
12+
driver.goto(url).await?;
13+
thread::sleep(Duration::from_secs(2));
14+
15+
search_location(&driver, place).await?;
16+
thread::sleep(Duration::from_secs(2));
17+
18+
scrape_all(driver).await?;
19+
20+
Ok(())
21+
}
22+
23+
async fn scrape_all(driver: WebDriver) -> Result<(), Box<dyn Error>> {
24+
driver
25+
.execute("window.scrollTo(0, document.body.scrollHeight);", vec![])
26+
.await?;
27+
thread::sleep(Duration::from_secs(1));
28+
29+
let mut wtr = csv::Writer::from_path("airbnb.csv")?;
30+
31+
loop {
32+
if let Ok(next_page_button) = driver.find(By::Css("#site-content > div > div.p1szzjq8.dir.dir-ltr > div > div > div > nav > div > a.l1ovpqvx.c1ytbx3a.dir.dir-ltr")).await {
33+
34+
match next_page_button.is_clickable().await? {
35+
true => {
36+
37+
let house_elems = get_house_elements(&driver).await?;
38+
39+
for house_elem in house_elems {
40+
41+
let bnb_details = BnbDetails::from(house_elem).await?;
42+
43+
wtr.serialize(bnb_details)?;
44+
45+
}
46+
load_next_page(next_page_button, &driver).await?;
47+
}
48+
false => {
49+
break
50+
},
51+
}
52+
} else {
53+
let house_elems = get_house_elements(&driver).await?;
54+
55+
for house_elem in house_elems {
56+
57+
let bnb_details = BnbDetails::from(house_elem).await?;
58+
wtr.serialize(bnb_details)?;
59+
}
60+
break;
61+
}
62+
}
63+
Ok(())
64+
}
65+
66+
async fn get_house_elements(driver: &WebDriver) -> Result<Vec<WebElement>, WebDriverError> {
67+
driver.find_all(By::Css("#site-content > div > div:nth-child(2) > div > div > div > div > div.gsgwcjk.g8ge8f1.g14v8520.dir.dir-ltr > div.dir.dir-ltr > div > div.c1l1h97y.dir.dir-ltr > div > div > div > div.cy5jw6o.dir.dir-ltr > div > div.g1qv1ctd.c1v0rf5q.dir.dir-ltr")).await
68+
}
69+
70+
async fn load_next_page(
71+
next_page_button: WebElement,
72+
driver: &WebDriver,
73+
) -> Result<(), Box<dyn Error>> {
74+
next_page_button.click().await?;
75+
thread::sleep(Duration::from_secs(2));
76+
77+
driver
78+
.execute("window.scrollTo(0, document.body.scrollHeight);", vec![])
79+
.await?;
80+
thread::sleep(Duration::from_secs(1));
81+
82+
Ok(())
83+
}
84+
85+
async fn initialize_driver() -> Result<WebDriver, WebDriverError> {
86+
let caps = DesiredCapabilities::chrome();
87+
let driver = WebDriver::new("http://localhost:9515", caps).await?;
88+
driver.maximize_window().await?;
89+
Ok(driver)
90+
}
91+
92+
async fn search_location(driver: &WebDriver, place: &str) -> Result<(), WebDriverError> {
93+
click_choose_place(driver).await?;
94+
95+
write_place(driver, place).await?;
96+
97+
driver.find(By::Css("#search-tabpanel > div.i1flv5qo.dir.dir-ltr > div.c6ezw63.c1geg2ah.dir.dir-ltr > div.c192dx2b.ckzf1ch.dir.dir-ltr > div.s31emu3.dir.dir-ltr > button")).await?.click().await?;
98+
99+
Ok(())
100+
}
101+
102+
async fn click_choose_place(driver: &WebDriver) -> Result<(), WebDriverError> {
103+
driver
104+
.find(By::Css("body > div:nth-child(8) > div > div > div:nth-child(1) > div > div.cd56ld.cb80sj1.dir.dir-ltr > div.h1ta6hky.dir.dir-ltr > div > div > div > header > div > div.cb994eh.dir.dir-ltr > div.lkm6i7z.lr5v90m.l1rzxhu2.l1kj223i.dir.dir-ltr > div > span.ij8oydg.dir.dir-ltr > button:nth-child(1)"))
105+
.await?.click().await?;
106+
107+
Ok(())
108+
}
109+
110+
async fn write_place(driver: &WebDriver, place: &str) -> Result<(), WebDriverError> {
111+
let input = driver
112+
.find(By::Css("#bigsearch-query-location-input"))
113+
.await?;
114+
while !input.is_clickable().await? {
115+
thread::sleep(Duration::from_millis(100));
116+
}
117+
input.send_keys(place).await?;
118+
119+
Ok(())
120+
}
121+
122+
#[derive(Debug, Serialize)]
123+
struct BnbDetails {
124+
title: String,
125+
description: String,
126+
host: String,
127+
availability: String,
128+
price: String,
129+
star: String,
130+
}
131+
132+
impl BnbDetails {
133+
async fn from(house_elem: WebElement) -> Result<Self, WebDriverError> {
134+
let title = BnbDetails::get_title(&house_elem).await?;
135+
let description = BnbDetails::get_description(&house_elem).await?;
136+
let host = BnbDetails::get_host(&house_elem).await?;
137+
let availability = BnbDetails::get_availability(&house_elem).await?;
138+
let price = BnbDetails::get_price(&house_elem).await?;
139+
let star = BnbDetails::get_star(&house_elem).await?;
140+
141+
Ok(Self {
142+
title,
143+
description,
144+
host,
145+
availability,
146+
price,
147+
star,
148+
})
149+
}
150+
async fn get_title(house_elem: &WebElement) -> Result<String, WebDriverError> {
151+
house_elem
152+
.find(By::Css("div:nth-child(1)"))
153+
.await?
154+
.text()
155+
.await
156+
}
157+
async fn get_description(house_elem: &WebElement) -> Result<String, WebDriverError> {
158+
house_elem
159+
.find(By::Css("div:nth-child(2) > span"))
160+
.await?
161+
.text()
162+
.await
163+
}
164+
async fn get_host(house_elem: &WebElement) -> Result<String, WebDriverError> {
165+
let host = house_elem
166+
.find(By::Css("div:nth-child(3) > span > span"))
167+
.await;
168+
if let Ok(host) = host {
169+
host.text().await
170+
} else {
171+
house_elem
172+
.find(By::Css("div:nth-child(3) > span"))
173+
.await?
174+
.text()
175+
.await
176+
}
177+
}
178+
async fn get_availability(house_elem: &WebElement) -> Result<String, WebDriverError> {
179+
house_elem
180+
.find(By::Css("div:nth-child(4) > span > span"))
181+
.await?
182+
.text()
183+
.await
184+
}
185+
async fn get_price(house_elem: &WebElement) -> Result<String, WebDriverError> {
186+
house_elem
187+
.find(By::XPath("div[5]/div/div/span[1]/div/span[1]"))
188+
.await?
189+
.text()
190+
.await
191+
}
192+
async fn get_star(house_elem: &WebElement) -> Result<String, WebDriverError> {
193+
if let Ok(star) = house_elem
194+
.find(By::Css("span > span.r1dxllyb.dir.dir-ltr"))
195+
.await
196+
{
197+
return star.text().await;
198+
}
199+
Ok("No ratings available".into())
200+
}
201+
}

cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[workspace]
2+
workspace.resolver = "2"
3+
members = ["scraper", "airbnb"]

scraper/Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[package]
2+
name = "scraper"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
airbnb = { path = "../airbnb" }
10+
tokio = { version = "1.32.0", features = ["full"] }

scraper/src/main.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
use std::error::Error;
2+
3+
#[tokio::main]
4+
async fn main() -> Result<(),Box<dyn Error>>{
5+
airbnb::scrape_airbnb("Rome").await?;
6+
Ok(())
7+
}

0 commit comments

Comments
 (0)