An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

36 lines
941B

  1. source("../utils/helpers.R")
  2. source("config.R")
  3. library(rvest)
  4. library(stringr)
  5. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  6. mk_url <- function(offset) {
  7. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  8. offset)
  9. }
  10. download_protocol <- function(path, name) {
  11. fp <- paste0(DOWNLOAD_DIR, name)
  12. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  13. }
  14. fetch_batch <- function(offset) {
  15. mk_url(offset) %>%
  16. read_html() %>%
  17. as.character() %>%
  18. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  19. `[[`(1) ->
  20. paths
  21. mapply(download_protocol, paths[,1], paths[,2])
  22. return(length(paths) > 0)
  23. }
  24. # TODO: error handling
  25. # - what if: page not reachable
  26. # - wrong format, etc.
  27. fetch_all <- function() {
  28. offset <- 0
  29. while(fetch_batch(offset)) offset <- offset + 10
  30. }