An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

36 行
941B

  1. source("../utils/helpers.R")
  2. source("config.R")
  3. library(rvest)
  4. library(stringr)
  5. mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)
  6. mk_url <- function(offset) {
  7. mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d",
  8. offset)
  9. }
  10. download_protocol <- function(path, name) {
  11. fp <- paste0(DOWNLOAD_DIR, name)
  12. try %$% download.file(mk_absolute_url(path), fp, quiet=T)
  13. }
  14. fetch_batch <- function(offset) {
  15. mk_url(offset) %>%
  16. read_html() %>%
  17. as.character() %>%
  18. str_match_all("/resource/blob/.*?/([0-9]*-data\\.xml)") %>%
  19. `[[`(1) ->
  20. paths
  21. mapply(download_protocol, paths[,1], paths[,2])
  22. return(length(paths) > 0)
  23. }
  24. # TODO: error handling
  25. # - what if: page not reachable
  26. # - wrong format, etc.
  27. fetch_all <- function() {
  28. offset <- 0
  29. while(fetch_batch(offset)) offset <- offset + 10
  30. }