An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

69 行
2.2KB

  1. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  2. mk_url <- function(offset) {
  3. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  4. offset)
  5. }
  6. download_protocol <- function(path, name, download_dir) {
  7. fp <- paste0(download_dir, name)
  8. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  9. progress <<- progress + 1
  10. setTimerProgressBar(pb, progress)
  11. }
  12. fetch_batch <- function(offset, download_dir) {
  13. stopifnot("Offset must be numeric" = is.numeric(offset))
  14. mk_url(offset) %>%
  15. rvest::read_html() %>%
  16. as.character() %>%
  17. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  18. `[[`(1) ->
  19. paths
  20. mapply(download_protocol,
  21. paths[,1],
  22. paths[,2],
  23. MoreArgs=list(download_dir = download_dir))
  24. return(length(paths) > 0)
  25. }
  26. # TODO: error handling
  27. # - what if: page not reachable
  28. # - wrong format, etc.
  29. #' Download available records
  30. #'
  31. #' This fetches all available records of the 19th legislative period of the german Bundestag.
  32. #'
  33. #' @param download_dir character
  34. #'
  35. #' @export
  36. fetch_all <- function(download_dir="records/", create=FALSE) {
  37. # check if download_dir path is a directory path
  38. if (str_sub(download_dir, -1) != .Platform$file.sep)
  39. download_dir <- str_c(download_dir, .Platform$file.sep)
  40. check_directory(download_dir, create)
  41. cat("Fetching all available records from bundestag.de. This may take a while ...\n")
  42. # create progress bar
  43. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  44. progress <<- 0
  45. # close progress bar on exit (also on error)
  46. on.exit({close(pb); cat("Done.\n")})
  47. # fetch batch by batch
  48. offset <- 0
  49. while(fetch_batch(offset, download_dir)) offset <- offset + 10
  50. # if successful, set progressbar to 100%
  51. setTimerProgressBar(pb, 250)
  52. }
  53. stop_dir_not_creatable <- function(cond) {
  54. # currently this has call: dir.create(download_dir)
  55. # do we want to change this to fetch_all(...) ?
  56. cond$message <- "Directory does not exist and can't be created. Probably because the path is not writeable."
  57. stop(cond)
  58. }