An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

37 rindas
1002B

  1. source("../utils/helpers.R")
  2. source("config.R")
  3. library(rvest)
  4. library(stringr)
  5. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  6. mk_url <- function(offset) {
  7. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  8. offset)
  9. }
  10. download_protocol <- function(path, name) {
  11. fp <- paste0(DOWNLOAD_DIR, name)
  12. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  13. }
  14. fetch_batch <- function(offset) {
  15. stopifnot("Offset must be numeric" = is.numeric(offset))
  16. mk_url(offset) %>%
  17. read_html() %>%
  18. as.character() %>%
  19. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  20. `[[`(1) ->
  21. paths
  22. mapply(download_protocol, paths[,1], paths[,2])
  23. return(length(paths) > 0)
  24. }
  25. # TODO: error handling
  26. # - what if: page not reachable
  27. # - wrong format, etc.
  28. fetch_all <- function() {
  29. offset <- 0
  30. while(fetch_batch(offset)) offset <- offset + 10
  31. }