|
|
@@ -0,0 +1,33 @@ |
|
|
|
|
|
source("../utils/helpers.R") |
|
|
|
|
|
library(RCurl) |
|
|
|
|
|
library(stringr) |
|
|
|
|
|
|
|
|
|
|
|
DOWNLOAD_DIR = "../data/" # warning: this is not created (should maybe) |
|
|
|
|
|
|
|
|
|
|
|
mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path) |
|
|
|
|
|
|
|
|
|
|
|
mk_url <- function(offset) { |
|
|
|
|
|
mk_absolute_url %$% sprintf("/ajax/filterlist/de/services/opendata/543410-543410?offset=%d", |
|
|
|
|
|
offset) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
download_protocol <- function(path, name) { |
|
|
|
|
|
fp <- paste0(DOWNLOAD_DIR, name) |
|
|
|
|
|
try %$% download.file(mk_absolute_url(path), fp, quiet=T) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
fetch_batch <- function(offset) { |
|
|
|
|
|
url <- mk_url(offset) |
|
|
|
|
|
res <- getURL(url) |
|
|
|
|
|
paths <- str_match_all(res, "/resource/blob/.*?/([0-9]*-data\\.xml)")[[1]] |
|
|
|
|
|
mapply(download_protocol, paths[,1], paths[,2]) |
|
|
|
|
|
return(length(paths) > 0) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
# TODO: error handling |
|
|
|
|
|
# - what if: page not reachable |
|
|
|
|
|
# - wrong format, etc. |
|
|
|
|
|
fetch_all <- function() { |
|
|
|
|
|
offset <- 0 |
|
|
|
|
|
while(fetch_batch(offset)) offset <- offset + 10 |
|
|
|
|
|
} |