An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

37 wiersze
1002B

  1. source("../utils/helpers.R")
  2. source("config.R")
  3. library(rvest)
  4. library(stringr)
  5. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  6. mk_url <- function(offset) {
  7. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  8. offset)
  9. }
  10. download_protocol <- function(path, name) {
  11. fp <- paste0(DOWNLOAD_DIR, name)
  12. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  13. }
  14. fetch_batch <- function(offset) {
  15. stopifnot("Offset must be numeric" = is.numeric(offset))
  16. mk_url(offset) %>%
  17. read_html() %>%
  18. as.character() %>%
  19. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  20. `[[`(1) ->
  21. paths
  22. mapply(download_protocol, paths[,1], paths[,2])
  23. return(length(paths) > 0)
  24. }
  25. # TODO: error handling
  26. # - what if: page not reachable
  27. # - wrong format, etc.
  28. fetch_all <- function() {
  29. offset <- 0
  30. while(fetch_batch(offset)) offset <- offset + 10
  31. }