-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_lq.R
34 lines (26 loc) · 864 Bytes
/
get_lq.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
if (!"fs" %in% row.names(installed.packages()))
install.packages("fs", repos = "https://cran.rstudio.com/")
library(rvest)
library(purrr)
library(magrittr)
library(dplyr)
library(fs)
base_url = "http://www2.stat.duke.edu/~cr173/lq/www.lq.com/en/findandbook/"
page = read_html(paste0(base_url, "hotel-listings.html"))
urls = page %>%
html_nodes("#hotelListing .col-sm-12 a") %>%
html_attr("href") %>%
discard(is.na) %>%
paste0(base_url, .)
output_dir = "data/lq"
fs::dir_create(output_dir, recursive = TRUE)
p = dplyr::progress_estimated(length(urls))
index = 0
purrr::walk(urls,
function(url) {
download.file(url,
destfile = fs::path(output_dir, fs::path_file(url)),
quiet = TRUE)
cat(".")
p$tick()$print()
})