|
- mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
-
- mk_url <- function(offset) {
- mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
- offset)
- }
-
- download_protocol <- function(path, name, download_dir) {
- fp <- paste0(download_dir, name)
- try %$% download.file(mk_absolute_url(path), fp, quiet=T)
- progress <<- progress + 1
- setTimerProgressBar(pb, progress)
- }
-
- fetch_batch <- function(offset, download_dir) {
- stopifnot("Offset must be numeric" = is.numeric(offset))
- mk_url(offset) %>%
- rvest::read_html() %>%
- as.character() %>%
- str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
- `[[`(1) ->
- paths
- mapply(download_protocol,
- paths[,1],
- paths[,2],
- MoreArgs=list(download_dir = download_dir))
- return(length(paths) > 0)
- }
-
- # TODO: error handling
- # - what if: page not reachable
- # - wrong format, etc.
-
- #' Download available records
- #'
- #' This fetches all available records of the 19th legislative period of the german Bundestag.
- #'
- #' @param download_dir character
- #' @param create bool
- #'
- #' if create is TRUE, the directory given in download_dir is created
- #'
- #' @export
- fetch_all <- function(download_dir="inst/records/", create=FALSE) {
- # append file separator if needed
- download_dir <- make_directory_path(download_dir)
-
- check_directory(download_dir, create)
- cat("Fetching all available records from bundestag.de. This may take a while ...\n")
-
- # create progress bar
- pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
- progress <<- 0
- # close progress bar on exit (also on error)
- on.exit({close(pb); cat("Done.\n")})
-
- # fetch batch by batch
- offset <- 0
- while(fetch_batch(offset, download_dir)) offset <- offset + 10
-
- # if successful, set progressbar to 100%
- setTimerProgressBar(pb, 250)
- }
|