mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path) mk_url <- function(offset) { mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d", offset) } download_protocol <- function(path, name, download_dir) { fp <- paste0(download_dir, name) try %$% download.file(mk_absolute_url(path), fp, quiet=T) progress <<- progress + 1 setTimerProgressBar(pb, progress) } fetch_batch <- function(offset, download_dir) { stopifnot("Offset must be numeric" = is.numeric(offset)) mk_url(offset) %>% rvest::read_html() %>% as.character() %>% str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>% `[[`(1) -> paths mapply(download_protocol, paths[,1], paths[,2], MoreArgs=list(download_dir = download_dir)) return(length(paths) > 0) } # TODO: error handling # - what if: page not reachable # - wrong format, etc. #' Download available records #' #' This fetches all available records of the 19th legislative period of the german Bundestag. #' #' @param download_dir character #' @param create bool #' #' if create is TRUE, the directory given in download_dir is created #' #' @export fetch_all <- function(download_dir="inst/records/", create=FALSE) { # append file separator if needed download_dir <- make_directory_path(download_dir) check_directory(download_dir, create) cat("Fetching all available records from bundestag.de. This may take a while ...\n") # create progress bar pb <<- timerProgressBar(min=0, max=250, width=40, char="+") progress <<- 0 # close progress bar on exit (also on error) on.exit({close(pb); cat("Done.\n")}) # fetch batch by batch offset <- 0 while(fetch_batch(offset, download_dir)) offset <- offset + 10 # if successful, set progressbar to 100% setTimerProgressBar(pb, 250) }