An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

50 wiersze
1.5KB

  1. source("../utils/helpers.R")
  2. source("config.R")
  3. library(rvest)
  4. library(stringr)
  5. library(pbapply)
  6. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  7. mk_url <- function(offset) {
  8. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  9. offset)
  10. }
  11. download_protocol <- function(path, name) {
  12. fp <- paste0(DOWNLOAD_DIR, name)
  13. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  14. progress <<- progress + 1
  15. setTimerProgressBar(pb, progress)
  16. }
  17. fetch_batch <- function(offset) {
  18. stopifnot("Offset must be numeric" = is.numeric(offset))
  19. mk_url(offset) %>%
  20. read_html() %>%
  21. as.character() %>%
  22. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  23. `[[`(1) ->
  24. paths
  25. mapply(download_protocol, paths[,1], paths[,2])
  26. return(length(paths) > 0)
  27. }
  28. # TODO: error handling
  29. # - what if: page not reachable
  30. # - wrong format, etc.
  31. fetch_all <- function() {
  32. cat("Fetching all available protocols from bundestag.de. This may take a while ...\n")
  33. # create progress bar
  34. pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
  35. progress <<- 0
  36. # close progress bar on exit (also on error)
  37. on.exit({close(pb); cat("Done.\n")})
  38. # fetch batch by batch
  39. offset <- 0
  40. while(fetch_batch(offset)) offset <- offset + 10
  41. # if successful, set progressbar to 100%
  42. setTimerProgressBar(pb, 250)
  43. }