An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

64 Zeilen
1.9KB

  1. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  2. mk_url <- function(offset) {
  3. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  4. offset)
  5. }
  6. download_protocol <- function(path, name, download_dir) {
  7. fp <- paste0(download_dir, name)
  8. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  9. progress <<- progress + 1
  10. setTimerProgressBar(pb, progress)
  11. }
  12. fetch_batch <- function(offset, download_dir) {
  13. stopifnot("Offset must be numeric" = is.numeric(offset))
  14. mk_url(offset) %>%
  15. rvest::read_html() %>%
  16. as.character() %>%
  17. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  18. `[[`(1) ->
  19. paths
  20. mapply(download_protocol,
  21. paths[,1],
  22. paths[,2],
  23. MoreArgs=list(download_dir = download_dir))
  24. return(length(paths) > 0)
  25. }
  26. # TODO: error handling
  27. # - what if: page not reachable
  28. # - wrong format, etc.
  29. #' Download available records
  30. #'
  31. #' This fetches all available records of the 19th legislative period of the german Bundestag.
  32. #'
  33. #' @param download_dir character
  34. #' @param create bool
  35. #'
  36. #' if create is TRUE, the directory given in download_dir is created
  37. #'
  38. #' @export
  39. fetch_all <- function(download_dir="inst/records/", create=FALSE) {
  40. # append file separator if needed
  41. download_dir <- make_directory_path(download_dir)
  42. check_directory(download_dir, create)
  43. cat("Fetching all available records from bundestag.de. This may take a while ...\n")
  44. # create progress bar
  45. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  46. progress <<- 0
  47. # close progress bar on exit (also on error)
  48. on.exit({close(pb); cat("Done.\n")})
  49. # fetch batch by batch
  50. offset <- 0
  51. while(fetch_batch(offset, download_dir)) offset <- offset + 10
  52. # if successful, set progressbar to 100%
  53. setTimerProgressBar(pb, 250)
  54. }