An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

36 rindas
941B

  1. source("../utils/helpers.R")
  2. source("config.R")
  3. library(rvest)
  4. library(stringr)
  5. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  6. mk_url <- function(offset) {
  7. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  8. offset)
  9. }
  10. download_protocol <- function(path, name) {
  11. fp <- paste0(DOWNLOAD_DIR, name)
  12. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  13. }
  14. fetch_batch <- function(offset) {
  15. mk_url(offset) %>%
  16. read_html() %>%
  17. as.character() %>%
  18. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  19. `[[`(1) ->
  20. paths
  21. mapply(download_protocol, paths[,1], paths[,2])
  22. return(length(paths) > 0)
  23. }
  24. # TODO: error handling
  25. # - what if: page not reachable
  26. # - wrong format, etc.
  27. fetch_all <- function() {
  28. offset <- 0
  29. while(fetch_batch(offset)) offset <- offset + 10
  30. }