An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

80 lignes
2.6KB

  1. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  2. mk_url <- function(offset) {
  3. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  4. offset)
  5. }
  6. download_protocol <- function(path, name, download_dir) {
  7. fp <- paste0(download_dir, name)
  8. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  9. progress <<- progress + 1
  10. setTimerProgressBar(pb, progress)
  11. }
  12. fetch_batch <- function(offset, download_dir) {
  13. stopifnot("Offset must be numeric" = is.numeric(offset))
  14. mk_url(offset) %>%
  15. rvest::read_html() %>%
  16. as.character() %>%
  17. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  18. `[[`(1) ->
  19. paths
  20. mapply(download_protocol,
  21. paths[,1],
  22. paths[,2],
  23. MoreArgs=list(download_dir = download_dir))
  24. return(length(paths) > 0)
  25. }
  26. # TODO: error handling
  27. # - what if: page not reachable
  28. # - wrong format, etc.
  29. #' Download available records
  30. #'
  31. #' This fetches all available records of the 19th legislative period of the german Bundestag.
  32. #'
  33. #' @param download_dir character
  34. #'
  35. #' @export
  36. fetch_all <- function(download_dir="records/", create=FALSE) {
  37. # check if download_dir path is a directory path
  38. if (str_sub(download_dir, -1) != .Platform$file.sep)
  39. download_dir <- str_c(download_dir, .Platform$file.sep)
  40. # check if download_dir exists
  41. if(file.access(download_dir, mode=0) == -1) {
  42. if (create) {
  43. tryCatch(dir.create(download_dir),
  44. error = stop_dir_not_creatable,
  45. warning = stop_dir_not_creatable)
  46. } else {
  47. stop("Directory does not exist. Use create = TRUE if you wish to create the directory.")
  48. }
  49. } else if (file.access(download_dir, mode=2) == -1) {
  50. stop("Directory exists, but is not writeable.")
  51. }
  52. cat("Fetching all available records from bundestag.de. This may take a while ...\n")
  53. # create progress bar
  54. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  55. progress <<- 0
  56. # close progress bar on exit (also on error)
  57. on.exit({close(pb); cat("Done.\n")})
  58. # fetch batch by batch
  59. offset <- 0
  60. while(fetch_batch(offset, download_dir)) offset <- offset + 10
  61. # if successful, set progressbar to 100%
  62. setTimerProgressBar(pb, 250)
  63. }
  64. stop_dir_not_creatable <- function(cond) {
  65. # currently this has call: dir.create(download_dir)
  66. # do we want to change this to fetch_all(...) ?
  67. cond$message <- "Directory does not exist and can't be created. Probably because the path is not writeable."
  68. stop(cond)
  69. }