An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 line
1.9KB

  1. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  2. mk_url <- function(offset) {
  3. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  4. offset)
  5. }
  6. download_protocol <- function(path, name, download_dir) {
  7. fp <- paste0(download_dir, name)
  8. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  9. progress <<- progress + 1
  10. setTimerProgressBar(pb, progress)
  11. }
  12. fetch_batch <- function(offset, download_dir) {
  13. stopifnot("Offset must be numeric" = is.numeric(offset))
  14. mk_url(offset) %>%
  15. rvest::read_html() %>%
  16. as.character() %>%
  17. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  18. `[[`(1) ->
  19. paths
  20. mapply(download_protocol,
  21. paths[,1],
  22. paths[,2],
  23. MoreArgs=list(download_dir = download_dir))
  24. return(length(paths) > 0)
  25. }
  26. # TODO: error handling
  27. # - what if: page not reachable
  28. # - wrong format, etc.
  29. #' Download available records
  30. #'
  31. #' This fetches all available records of the 19th legislative period of the german Bundestag.
  32. #'
  33. #' @param download_dir character
  34. #' @param create bool
  35. #'
  36. #' if create is TRUE, the directory given in download_dir is created
  37. #'
  38. #' @export
  39. fetch_all <- function(download_dir="inst/records/", create=FALSE) {
  40. # append file separator if needed
  41. download_dir <- make_directory_path(download_dir)
  42. check_directory(download_dir, create)
  43. cat("Fetching all available records from bundestag.de. This may take a while ...\n")
  44. # create progress bar
  45. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  46. progress <<- 0
  47. # close progress bar on exit (also on error)
  48. on.exit({close(pb); cat("Done.\n")})
  49. # fetch batch by batch
  50. offset <- 0
  51. while(fetch_batch(offset, download_dir)) offset <- offset + 10
  52. # if successful, set progressbar to 100%
  53. setTimerProgressBar(pb, 250)
  54. }