An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.6KB

  1. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  2. mk_url <- function(offset) {
  3. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  4. offset)
  5. }
  6. download_protocol <- function(path, name, download_dir) {
  7. fp <- paste0(download_dir, name)
  8. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  9. progress <<- progress + 1
  10. setTimerProgressBar(pb, progress)
  11. }
  12. fetch_batch <- function(offset, download_dir) {
  13. stopifnot("Offset must be numeric" = is.numeric(offset))
  14. mk_url(offset) %>%
  15. rvest::read_html() %>%
  16. as.character() %>%
  17. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  18. `[[`(1) ->
  19. paths
  20. mapply(download_protocol,
  21. paths[,1],
  22. paths[,2],
  23. MoreArgs=list(download_dir = download_dir))
  24. return(length(paths) > 0)
  25. }
  26. # TODO: error handling
  27. # - what if: page not reachable
  28. # - wrong format, etc.
  29. #' Download available records
  30. #'
  31. #' This fetches all available records of the 19th legislative period of the german Bundestag.
  32. #'
  33. #' @param download_dir character
  34. #'
  35. #' @export
  36. fetch_all <- function(download_dir="records/", create=FALSE) {
  37. # check if download_dir path is a directory path
  38. if (str_sub(download_dir, -1) != .Platform$file.sep)
  39. download_dir <- str_c(download_dir, .Platform$file.sep)
  40. # check if download_dir exists
  41. if(file.access(download_dir, mode=0) == -1) {
  42. if (create) {
  43. tryCatch(dir.create(download_dir),
  44. error = stop_dir_not_creatable,
  45. warning = stop_dir_not_creatable)
  46. } else {
  47. stop("Directory does not exist. Use create = TRUE if you wish to create the directory.")
  48. }
  49. } else if (file.access(download_dir, mode=2) == -1) {
  50. stop("Directory exists, but is not writeable.")
  51. }
  52. cat("Fetching all available records from bundestag.de. This may take a while ...\n")
  53. # create progress bar
  54. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  55. progress <<- 0
  56. # close progress bar on exit (also on error)
  57. on.exit({close(pb); cat("Done.\n")})
  58. # fetch batch by batch
  59. offset <- 0
  60. while(fetch_batch(offset, download_dir)) offset <- offset + 10
  61. # if successful, set progressbar to 100%
  62. setTimerProgressBar(pb, 250)
  63. }
  64. stop_dir_not_creatable <- function(cond) {
  65. # currently this has call: dir.create(download_dir)
  66. # do we want to change this to fetch_all(...) ?
  67. cond$message <- "Directory does not exist and can't be created. Probably because the path is not writeable."
  68. stop(cond)
  69. }