An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

47 lines
1.5KB

  1. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  2. mk_url <- function(offset) {
  3. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  4. offset)
  5. }
  6. download_protocol <- function(path, name, download_dir) {
  7. fp <- paste0(download_dir, name)
  8. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  9. progress <<- progress + 1
  10. setTimerProgressBar(pb, progress)
  11. }
  12. fetch_batch <- function(offset, download_dir) {
  13. stopifnot("Offset must be numeric" = is.numeric(offset))
  14. mk_url(offset) %>%
  15. rvest::read_html() %>%
  16. as.character() %>%
  17. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  18. `[[`(1) ->
  19. paths
  20. mapply(download_protocol,
  21. paths[,1],
  22. paths[,2],
  23. MoreArgs=list(download_dir = download_dir))
  24. return(length(paths) > 0)
  25. }
  26. # TODO: error handling
  27. # - what if: page not reachable
  28. # - wrong format, etc.
  29. fetch_all <- function(download_dir="records/") {
  30. cat("Fetching all available records from bundestag.de. This may take a while ...\n")
  31. # create progress bar
  32. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  33. progress <<- 0
  34. # close progress bar on exit (also on error)
  35. on.exit({close(pb); cat("Done.\n")})
  36. # fetch batch by batch
  37. offset <- 0
  38. while(fetch_batch(offset, download_dir)) offset <- offset + 10
  39. # if successful, set progressbar to 100%
  40. setTimerProgressBar(pb, 250)
  41. }