An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.3KB

  1. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  2. mk_url <- function(offset) {
  3. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  4. offset)
  5. }
  6. download_protocol <- function(path, name, download_dir) {
  7. fp <- paste0(download_dir, name)
  8. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  9. progress <<- progress + 1
  10. setTimerProgressBar(pb, progress)
  11. }
  12. fetch_batch <- function(offset, download_dir) {
  13. stopifnot("Offset must be numeric" = is.numeric(offset))
  14. mk_url(offset) %>%
  15. rvest::read_html() %>%
  16. as.character() %>%
  17. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  18. `[[`(1) ->
  19. paths
  20. mapply(download_protocol,
  21. paths[,1],
  22. paths[,2],
  23. MoreArgs=list(download_dir = download_dir))
  24. return(length(paths) > 0)
  25. }
  26. # TODO: error handling
  27. # - what if: page not reachable
  28. # - wrong format, etc.
  29. #' Download available records
  30. #'
  31. #' This fetches all available records of the 19th legislative period of the german Bundestag.
  32. #'
  33. #' @param download_dir character
  34. #' @param create bool
  35. #'
  36. #' if create is TRUE, the directory given in download_dir is created
  37. #'
  38. #' @export
  39. fetch_all <- function(download_dir="data/records/", create=FALSE) {
  40. # check if download_dir path is a directory path
  41. if (str_sub(download_dir, -1) != .Platform$file.sep)
  42. download_dir <- str_c(download_dir, .Platform$file.sep)
  43. check_directory(download_dir, create)
  44. cat("Fetching all available records from bundestag.de. This may take a while ...\n")
  45. # create progress bar
  46. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  47. progress <<- 0
  48. # close progress bar on exit (also on error)
  49. on.exit({close(pb); cat("Done.\n")})
  50. # fetch batch by batch
  51. offset <- 0
  52. while(fetch_batch(offset, download_dir)) offset <- offset + 10
  53. # if successful, set progressbar to 100%
  54. setTimerProgressBar(pb, 250)
  55. }
  56. stop_dir_not_creatable <- function(cond) {
  57. # currently this has call: dir.create(download_dir)
  58. # do we want to change this to fetch_all(...) ?
  59. cond$message <- "Directory does not exist and can't be created. Probably because the path is not writeable."
  60. stop(cond)
  61. }